前言
我在Internet上浏览了很多遍,找到了很多有关哈希表的有趣文章,但是我没有找到关于它们如何实现的可理解的完整描述。在这方面,我迫不及待想写一个关于这个如此有趣的主题的文章。
也许对于经验丰富的程序员而言,它并没有那么有用,但是对于技术大学的学生和新手自学的程序员而言,它将会很有趣。
使用哈希表的动机
.
\ | insert | remove | find |
---|---|---|---|
Array | O(N) | O(N) | O(N) |
List | O(1) | O(1) | O(N) |
Sorted array | O(N) | O(N) | O(logN) |
O(logN) | O(logN) | O(logN) | |
- | O(1) | O(1) | O(1) |
, -
, -. : ?
: , , : , . - , , , .
-
- — . , , , .
. , . () . O(1) .
, -.
, , , , , . , , - .
: . , , , .
( ) -, .
- ( g) s, . , , g s . , ? -, t — , , g.
s, s + t, s + 2*t .. , , ( ).
-
-, .
-, . - , . , , -.
int HashFunctionHorner(const std::string& s, int table_size, const int key)
{
int hash_result = 0;
for (int i = 0; s[i] != 0; ++i)
hash_result = (key * hash_result + s[i]) % table_size;
hash_result = (hash_result * 2 + 1) % table_size;
return hash_result;
}
struct HashFunction1
{
int operator()(const std::string& s, int table_size) const
{
return HashFunctionHorner(s, table_size, table_size - 1); // , < > .
}
};
struct HashFunction2
{
int operator()(const std::string& s, int table_size) const
{
return HashFunctionHorner(s, table_size, table_size + 1);
}
};
, : , ? , deleted, . , ( - ) . , , , , - . .
.
template <class T, class THash1 = HashFunction1, class THash2 = HashFunction2>
class HashTable
{
static const int default_size = 8; //
constexpr static const double rehash_size = 0.75; // ,
struct Node
{
T value;
bool state; // state = false, (deleted)
Node(const T& value_) : value(value_), state(true) {}
};
Node** arr; // Node*
int size; // ( deleted)
int buffer_size; // ,
int size_all_non_nullptr; // ( deleted)
};
- , . .
...
public:
HashTable()
{
buffer_size = default_size;
size = 0;
size_all_non_nullptr = 0;
arr = new Node*[buffer_size];
for (int i = 0; i < buffer_size; ++i)
arr[i] = nullptr; // nullptr - ,
}
~HashTable()
{
for (int i = 0; i < buffer_size; ++i)
if (arr[i])
delete arr[i];
delete[] arr;
}
, — Resize.
.
void Resize()
{
int past_buffer_size = buffer_size;
buffer_size *= 2;
size_all_non_nullptr = 0;
size = 0;
Node** arr2 = new Node * [buffer_size];
for (int i = 0; i < buffer_size; ++i)
arr2[i] = nullptr;
std::swap(arr, arr2);
for (int i = 0; i < past_buffer_size; ++i)
{
if (arr2[i] && arr2[i]->state)
Add(arr2[i]->value); //
}
//
for (int i = 0; i < past_buffer_size; ++i)
if (arr2[i])
delete arr2[i];
delete[] arr2;
}
O(1) . ? (deleted). , , , . . - ( , ).
, 50, Rehash, , (resize), . , , . - , . deleted- , , .
, :
void Rehash()
{
size_all_non_nullptr = 0;
size = 0;
Node** arr2 = new Node * [buffer_size];
for (int i = 0; i < buffer_size; ++i)
arr2[i] = nullptr;
std::swap(arr, arr2);
for (int i = 0; i < buffer_size; ++i)
{
if (arr2[i] && arr2[i]->state)
Add(arr2[i]->value);
}
//
for (int i = 0; i < buffer_size; ++i)
if (arr2[i])
delete arr2[i];
delete[] arr2;
}
, , , . (Add), (Remove) (Find) .
— Find .
bool Find(const T& value, const THash1& hash1 = THash1(), const THash2& hash2 = THash2())
{
int h1 = hash1(value, buffer_size); // ,
int h2 = hash2(value, buffer_size); // , ""
int i = 0;
while (arr[h1] != nullptr && i < buffer_size)
{
if (arr[h1]->value == value && arr[h1]->state)
return true; //
h1 = (h1 + h2) % buffer_size;
++i; // i >= buffer_size, , i, .
}
return false;
}
— Remove. ? ( Find), , state false, Node .
bool Remove(const T& value, const THash1& hash1 = THash1(), const THash2& hash2 = THash2())
{
int h1 = hash1(value, buffer_size);
int h2 = hash2(value, buffer_size);
int i = 0;
while (arr[h1] != nullptr && i < buffer_size)
{
if (arr[h1]->value == value && arr[h1]->state)
{
arr[h1]->state = false;
--size;
return true;
}
h1 = (h1 + h2) % buffer_size;
++i;
}
return false;
}
Add. . .
, . ( deleted). , - . deleted- , Node .
bool Add(const T& value, const THash1& hash1 = THash1(),const THash2& hash2 = THash2())
{
if (size + 1 > int(rehash_size * buffer_size))
Resize();
else if (size_all_non_nullptr > 2 * size)
Rehash(); // , deleted-
int h1 = hash1(value, buffer_size);
int h2 = hash2(value, buffer_size);
int i = 0;
int first_deleted = -1; // ()
while (arr[h1] != nullptr && i < buffer_size)
{
if (arr[h1]->value == value && arr[h1]->state)
return false; // ,
if (!arr[h1]->state && first_deleted == -1) //
first_deleted = h1;
h1 = (h1 + h2) % buffer_size;
++i;
}
if (first_deleted == -1) // , Node
{
arr[h1] = new Node(value);
++size_all_non_nullptr; // , ,
}
else
{
arr[first_deleted]->value = value;
arr[first_deleted]->state = true;
}
++size; //
return true;
}
int HashFunctionHorner(const std::string& s, int table_size, const int key)
{
int hash_result = 0;
for (int i = 0; s[i] != 0; ++i)
{
hash_result = (key * hash_result + s[i]) % table_size;
}
hash_result = (hash_result * 2 + 1) % table_size;
return hash_result;
}
struct HashFunction1
{
int operator()(const std::string& s, int table_size) const
{
return HashFunctionHorner(s, table_size, table_size - 1);
}
};
struct HashFunction2
{
int operator()(const std::string& s, int table_size) const
{
return HashFunctionHorner(s, table_size, table_size + 1);
}
};
template <class T, class THash1 = HashFunction1, class THash2 = HashFunction2>
class HashTable
{
static const int default_size = 8;
constexpr static const double rehash_size = 0.75;
struct Node
{
T value;
bool state;
Node(const T& value_) : value(value_), state(true) {}
};
Node** arr;
int size;
int buffer_size;
int size_all_non_nullptr;
void Resize()
{
int past_buffer_size = buffer_size;
buffer_size *= 2;
size_all_non_nullptr = 0;
size = 0;
Node** arr2 = new Node * [buffer_size];
for (int i = 0; i < buffer_size; ++i)
arr2[i] = nullptr;
std::swap(arr, arr2);
for (int i = 0; i < past_buffer_size; ++i)
{
if (arr2[i] && arr2[i]->state)
Add(arr2[i]->value);
}
for (int i = 0; i < past_buffer_size; ++i)
if (arr2[i])
delete arr2[i];
delete[] arr2;
}
void Rehash()
{
size_all_non_nullptr = 0;
size = 0;
Node** arr2 = new Node * [buffer_size];
for (int i = 0; i < buffer_size; ++i)
arr2[i] = nullptr;
std::swap(arr, arr2);
for (int i = 0; i < buffer_size; ++i)
{
if (arr2[i] && arr2[i]->state)
Add(arr2[i]->value);
}
for (int i = 0; i < buffer_size; ++i)
if (arr2[i])
delete arr2[i];
delete[] arr2;
}
public:
HashTable()
{
buffer_size = default_size;
size = 0;
size_all_non_nullptr = 0;
arr = new Node*[buffer_size];
for (int i = 0; i < buffer_size; ++i)
arr[i] = nullptr;
}
~HashTable()
{
for (int i = 0; i < buffer_size; ++i)
if (arr[i])
delete arr[i];
delete[] arr;
}
bool Add(const T& value, const THash1& hash1 = THash1(),const THash2& hash2 = THash2())
{
if (size + 1 > int(rehash_size * buffer_size))
Resize();
else if (size_all_non_nullptr > 2 * size)
Rehash();
int h1 = hash1(value, buffer_size);
int h2 = hash2(value, buffer_size);
int i = 0;
int first_deleted = -1;
while (arr[h1] != nullptr && i < buffer_size)
{
if (arr[h1]->value == value && arr[h1]->state)
return false;
if (!arr[h1]->state && first_deleted == -1)
first_deleted = h1;
h1 = (h1 + h2) % buffer_size;
++i;
}
if (first_deleted == -1)
{
arr[h1] = new Node(value);
++size_all_non_nullptr;
}
else
{
arr[first_deleted]->value = value;
arr[first_deleted]->state = true;
}
++size;
return true;
}
bool Remove(const T& value, const THash1& hash1 = THash1(), const THash2& hash2 = THash2())
{
int h1 = hash1(value, buffer_size);
int h2 = hash2(value, buffer_size);
int i = 0;
while (arr[h1] != nullptr && i < buffer_size)
{
if (arr[h1]->value == value && arr[h1]->state)
{
arr[h1]->state = false;
--size;
return true;
}
h1 = (h1 + h2) % buffer_size;
++i;
}
return false;
}
bool Find(const T& value, const THash1& hash1 = THash1(), const THash2& hash2 = THash2())
{
int h1 = hash1(value, buffer_size);
int h2 = hash2(value, buffer_size);
int i = 0;
while (arr[h1] != nullptr && i < buffer_size)
{
if (arr[h1]->value == value && arr[h1]->state)
return true;
h1 = (h1 + h2) % buffer_size;
++i;
}
return false;
}
};