KHtml

StringHash.h
1 /*
2  * Copyright (C) 2006, 2007, 2008 Apple Inc. All rights reserved
3  *
4  * This library is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Library General Public
6  * License as published by the Free Software Foundation; either
7  * version 2 of the License, or (at your option) any later version.
8  *
9  * This library is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  * Library General Public License for more details.
13  *
14  * You should have received a copy of the GNU Library General Public License
15  * along with this library; see the file COPYING.LIB. If not, write to
16  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
17  * Boston, MA 02110-1301, USA.
18  *
19  */
20 
21 #ifndef StringHash_h
22 #define StringHash_h
23 
24 #include "AtomicStringImpl.h"
25 #include "dom/dom_string.h"
26 #include "xml/dom_stringimpl.h"
27 #include <wtf/HashTraits.h>
28 
29 using DOM::DOMString;
30 using DOM::DOMStringImpl;
31 
32 namespace khtml
33 {
34 
35 // FIXME: We should really figure out a way to put the computeHash function that's
36 // currently a member function of DOMStringImpl into this file so we can be a little
37 // closer to having all the nearly-identical hash functions in one place.
38 
39 struct StringHash {
40  static unsigned hash(DOMStringImpl *key)
41  {
42  return key->hash();
43  }
44  static bool equal(DOMStringImpl *a, DOMStringImpl *b)
45  {
46  if (a == b) {
47  return true;
48  }
49  if (!a || !b) {
50  return false;
51  }
52 
53  unsigned aLength = a->length();
54  unsigned bLength = b->length();
55  if (aLength != bLength) {
56  return false;
57  }
58 
59  const uint32_t *aChars = reinterpret_cast<const uint32_t *>(a->unicode());
60  const uint32_t *bChars = reinterpret_cast<const uint32_t *>(b->unicode());
61 
62  unsigned halfLength = aLength >> 1;
63  for (unsigned i = 0; i != halfLength; ++i)
64  if (*aChars++ != *bChars++) {
65  return false;
66  }
67 
68  if (aLength & 1 && *reinterpret_cast<const uint16_t *>(aChars) != *reinterpret_cast<const uint16_t *>(bChars)) {
69  return false;
70  }
71 
72  return true;
73  }
74 
75  static unsigned hash(const RefPtr<DOMStringImpl> &key)
76  {
77  return key->hash();
78  }
79  static bool equal(const RefPtr<DOMStringImpl> &a, const RefPtr<DOMStringImpl> &b)
80  {
81  return equal(a.get(), b.get());
82  }
83 
84  static unsigned hash(const DOMString &key)
85  {
86  return key.implementation()->hash();
87  }
88  static bool equal(const DOMString &a, const DOMString &b)
89  {
90  return equal(a.implementation(), b.implementation());
91  }
92 
93  static const bool safeToCompareToEmptyOrDeleted = false;
94 };
95 
96 class CaseFoldingHash
97 {
98 private:
99  // Golden ratio - arbitrary start value to avoid mapping all 0's to all 0's
100  static const unsigned PHI = 0x9e3779b9U;
101 public:
102  // Paul Hsieh's SuperFastHash
103  // http://www.azillionmonkeys.com/qed/hash.html
104  static unsigned hash(const QChar *data, unsigned length)
105  {
106  unsigned l = length;
107  const QChar *s = data;
108  uint32_t hash = PHI;
109  uint32_t tmp;
110 
111  int rem = l & 1;
112  l >>= 1;
113 
114  // Main loop.
115  for (; l > 0; l--) {
116  hash += s[0].toCaseFolded().unicode();
117  tmp = (s[1].toCaseFolded().unicode() << 11) ^ hash;
118  hash = (hash << 16) ^ tmp;
119  s += 2;
120  hash += hash >> 11;
121  }
122 
123  // Handle end case.
124  if (rem) {
125  hash += s[0].toCaseFolded().unicode();
126  hash ^= hash << 11;
127  hash += hash >> 17;
128  }
129 
130  // Force "avalanching" of final 127 bits.
131  hash ^= hash << 3;
132  hash += hash >> 5;
133  hash ^= hash << 2;
134  hash += hash >> 15;
135  hash ^= hash << 10;
136 
137  // This avoids ever returning a hash code of 0, since that is used to
138  // signal "hash not computed yet", using a value that is likely to be
139  // effectively the same as 0 when the low bits are masked.
140  hash |= !hash << 31;
141 
142  return hash;
143  }
144 
145  static unsigned hash(DOMStringImpl *str)
146  {
147  return hash(str->unicode(), str->length());
148  }
149 
150  static unsigned hash(const char *str, unsigned length)
151  {
152  // This hash is designed to work on 16-bit chunks at a time. But since the normal case
153  // (above) is to hash UTF-16 characters, we just treat the 8-bit chars as if they
154  // were 16-bit chunks, which will give matching results.
155 
156  unsigned l = length;
157  const char *s = str;
158  uint32_t hash = PHI;
159  uint32_t tmp;
160 
161  int rem = l & 1;
162  l >>= 1;
163 
164  // Main loop
165  for (; l > 0; l--) {
166  hash += QChar::toCaseFolded((unsigned int)s[0]);
167  tmp = (QChar::toCaseFolded((unsigned int)s[1]) << 11) ^ hash;
168  hash = (hash << 16) ^ tmp;
169  s += 2;
170  hash += hash >> 11;
171  }
172 
173  // Handle end case
174  if (rem) {
175  hash += QChar::toCaseFolded((unsigned int)s[0]);
176  hash ^= hash << 11;
177  hash += hash >> 17;
178  }
179 
180  // Force "avalanching" of final 127 bits
181  hash ^= hash << 3;
182  hash += hash >> 5;
183  hash ^= hash << 2;
184  hash += hash >> 15;
185  hash ^= hash << 10;
186 
187  // this avoids ever returning a hash code of 0, since that is used to
188  // signal "hash not computed yet", using a value that is likely to be
189  // effectively the same as 0 when the low bits are masked
190  hash |= !hash << 31;
191 
192  return hash;
193  }
194 
195  static bool equal(DOMStringImpl *a, DOMStringImpl *b)
196  {
197  if (a == b) {
198  return true;
199  }
200  if (!a || !b) {
201  return false;
202  }
203  unsigned length = a->length();
204  if (length != b->length()) {
205  return false;
206  }
207  for (unsigned i = 0; i < length; ++i)
208  if (a->unicode()[i].toCaseFolded() != b->unicode()[i].toCaseFolded()) {
209  return false;
210  }
211  return true;
212  }
213 
214  static unsigned hash(const RefPtr<DOMStringImpl> &key)
215  {
216  return hash(key.get());
217  }
218 
219  static bool equal(const RefPtr<DOMStringImpl> &a, const RefPtr<DOMStringImpl> &b)
220  {
221  return equal(a.get(), b.get());
222  }
223 
224  static unsigned hash(const DOMString &key)
225  {
226  return hash(key.implementation());
227  }
228  static bool equal(const DOMString &a, const DOMString &b)
229  {
230  return equal(a.implementation(), b.implementation());
231  }
232 
233  static const bool safeToCompareToEmptyOrDeleted = false;
234 };
235 
236 // This hash can be used in cases where the key is a hash of a string, but we don't
237 // want to store the string. It's not really specific to string hashing, but all our
238 // current uses of it are for strings.
239 struct AlreadyHashed : IntHash<unsigned> {
240  static unsigned hash(unsigned key)
241  {
242  return key;
243  }
244 
245  // To use a hash value as a key for a hash table, we need to eliminate the
246  // "deleted" value, which is negative one. That could be done by changing
247  // the string hash function to never generate negative one, but this works
248  // and is still relatively efficient.
249  static unsigned avoidDeletedValue(unsigned hash)
250  {
251  ASSERT(hash);
252  unsigned newHash = hash | (!(hash + 1) << 31);
253  ASSERT(newHash);
254  ASSERT(newHash != 0xFFFFFFFF);
255  return newHash;
256  }
257 };
258 
259 }
260 
261 namespace WTF
262 {
263 
264 /*template<> struct HashTraits<DOM::DOMString> : GenericHashTraits<DOM::DOMString> {
265  static const bool emptyValueIsZero = true;
266  static void constructDeletedValue(DOM::DOMString& slot) { new (&slot) DOM::DOMString(HashTableDeletedValue); }
267  static bool isDeletedValue(const DOM::DOMString& slot) { return slot.isHashTableDeletedValue(); }
268 };*/
269 
270 }
271 
272 #endif
273 
This file is part of the HTML rendering engine for KDE.
QChar toCaseFolded() const const
This class implements the basic string we use in the DOM.
Definition: dom_string.h:44
ushort unicode() const const
DOMStringImpl * implementation() const
Definition: dom_string.h:145
Definition: css_base.h:371
This file is part of the KDE documentation.
Documentation copyright © 1996-2021 The KDE developers.
Generated on Sat Oct 16 2021 22:48:01 by doxygen 1.8.11 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.