1 // Copyright 2019 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package maphash provides hash functions on byte sequences. 6 // These hash functions are intended to be used to implement hash tables or 7 // other data structures that need to map arbitrary strings or byte 8 // sequences to a uniform distribution on unsigned 64-bit integers. 9 // Each different instance of a hash table or data structure should use its own Seed. 10 // 11 // The hash functions are not cryptographically secure. 12 // (See crypto/sha256 and crypto/sha512 for cryptographic use.) 13 package maphash 14 15 import ( 16 "internal/unsafeheader" 17 "unsafe" 18 ) 19 20 // A Seed is a random value that selects the specific hash function 21 // computed by a Hash. If two Hashes use the same Seeds, they 22 // will compute the same hash values for any given input. 23 // If two Hashes use different Seeds, they are very likely to compute 24 // distinct hash values for any given input. 25 // 26 // A Seed must be initialized by calling MakeSeed. 27 // The zero seed is uninitialized and not valid for use with Hash's SetSeed method. 28 // 29 // Each Seed value is local to a single process and cannot be serialized 30 // or otherwise recreated in a different process. 31 type Seed struct { 32 s uint64 33 } 34 35 // Bytes returns the hash of b with the given seed. 36 // 37 // Bytes is equivalent to, but more convenient and efficient than: 38 // 39 // var h Hash 40 // h.SetSeed(seed) 41 // h.Write(b) 42 // return h.Sum64() 43 func Bytes(seed Seed, b []byte) uint64 { 44 state := seed.s 45 if state == 0 { 46 panic("maphash: use of uninitialized Seed") 47 } 48 if len(b) == 0 { 49 return rthash(nil, 0, state) // avoid &b[0] index panic below 50 } 51 if len(b) > bufSize { 52 b = b[:len(b):len(b)] // merge len and cap calculations when reslicing 53 for len(b) > bufSize { 54 state = rthash(&b[0], bufSize, state) 55 b = b[bufSize:] 56 } 57 } 58 return rthash(&b[0], len(b), state) 59 } 60 61 // String returns the hash of s with the given seed. 62 // 63 // String is equivalent to, but more convenient and efficient than: 64 // 65 // var h Hash 66 // h.SetSeed(seed) 67 // h.WriteString(s) 68 // return h.Sum64() 69 func String(seed Seed, s string) uint64 { 70 state := seed.s 71 if state == 0 { 72 panic("maphash: use of uninitialized Seed") 73 } 74 for len(s) > bufSize { 75 p := (*byte)((*unsafeheader.String)(unsafe.Pointer(&s)).Data) 76 state = rthash(p, bufSize, state) 77 s = s[bufSize:] 78 } 79 p := (*byte)((*unsafeheader.String)(unsafe.Pointer(&s)).Data) 80 return rthash(p, len(s), state) 81 } 82 83 // A Hash computes a seeded hash of a byte sequence. 84 // 85 // The zero Hash is a valid Hash ready to use. 86 // A zero Hash chooses a random seed for itself during 87 // the first call to a Reset, Write, Seed, or Sum64 method. 88 // For control over the seed, use SetSeed. 89 // 90 // The computed hash values depend only on the initial seed and 91 // the sequence of bytes provided to the Hash object, not on the way 92 // in which the bytes are provided. For example, the three sequences 93 // 94 // h.Write([]byte{'f','o','o'}) 95 // h.WriteByte('f'); h.WriteByte('o'); h.WriteByte('o') 96 // h.WriteString("foo") 97 // 98 // all have the same effect. 99 // 100 // Hashes are intended to be collision-resistant, even for situations 101 // where an adversary controls the byte sequences being hashed. 102 // 103 // A Hash is not safe for concurrent use by multiple goroutines, but a Seed is. 104 // If multiple goroutines must compute the same seeded hash, 105 // each can declare its own Hash and call SetSeed with a common Seed. 106 type Hash struct { 107 _ [0]func() // not comparable 108 seed Seed // initial seed used for this hash 109 state Seed // current hash of all flushed bytes 110 buf [bufSize]byte // unflushed byte buffer 111 n int // number of unflushed bytes 112 } 113 114 // bufSize is the size of the Hash write buffer. 115 // The buffer ensures that writes depend only on the sequence of bytes, 116 // not the sequence of WriteByte/Write/WriteString calls, 117 // by always calling rthash with a full buffer (except for the tail). 118 const bufSize = 128 119 120 // initSeed seeds the hash if necessary. 121 // initSeed is called lazily before any operation that actually uses h.seed/h.state. 122 // Note that this does not include Write/WriteByte/WriteString in the case 123 // where they only add to h.buf. (If they write too much, they call h.flush, 124 // which does call h.initSeed.) 125 func (h *Hash) initSeed() { 126 if h.seed.s == 0 { 127 seed := MakeSeed() 128 h.seed = seed 129 h.state = seed 130 } 131 } 132 133 // WriteByte adds b to the sequence of bytes hashed by h. 134 // It never fails; the error result is for implementing io.ByteWriter. 135 func (h *Hash) WriteByte(b byte) error { 136 if h.n == len(h.buf) { 137 h.flush() 138 } 139 h.buf[h.n] = b 140 h.n++ 141 return nil 142 } 143 144 // Write adds b to the sequence of bytes hashed by h. 145 // It always writes all of b and never fails; the count and error result are for implementing io.Writer. 146 func (h *Hash) Write(b []byte) (int, error) { 147 size := len(b) 148 // Deal with bytes left over in h.buf. 149 // h.n <= bufSize is always true. 150 // Checking it is ~free and it lets the compiler eliminate a bounds check. 151 if h.n > 0 && h.n <= bufSize { 152 k := copy(h.buf[h.n:], b) 153 h.n += k 154 if h.n < bufSize { 155 // Copied the entirety of b to h.buf. 156 return size, nil 157 } 158 b = b[k:] 159 h.flush() 160 // No need to set h.n = 0 here; it happens just before exit. 161 } 162 // Process as many full buffers as possible, without copying, and calling initSeed only once. 163 if len(b) > bufSize { 164 h.initSeed() 165 for len(b) > bufSize { 166 h.state.s = rthash(&b[0], bufSize, h.state.s) 167 b = b[bufSize:] 168 } 169 } 170 // Copy the tail. 171 copy(h.buf[:], b) 172 h.n = len(b) 173 return size, nil 174 } 175 176 // WriteString adds the bytes of s to the sequence of bytes hashed by h. 177 // It always writes all of s and never fails; the count and error result are for implementing io.StringWriter. 178 func (h *Hash) WriteString(s string) (int, error) { 179 // WriteString mirrors Write. See Write for comments. 180 size := len(s) 181 if h.n > 0 && h.n <= bufSize { 182 k := copy(h.buf[h.n:], s) 183 h.n += k 184 if h.n < bufSize { 185 return size, nil 186 } 187 s = s[k:] 188 h.flush() 189 } 190 if len(s) > bufSize { 191 h.initSeed() 192 for len(s) > bufSize { 193 ptr := (*byte)((*unsafeheader.String)(unsafe.Pointer(&s)).Data) 194 h.state.s = rthash(ptr, bufSize, h.state.s) 195 s = s[bufSize:] 196 } 197 } 198 copy(h.buf[:], s) 199 h.n = len(s) 200 return size, nil 201 } 202 203 // Seed returns h's seed value. 204 func (h *Hash) Seed() Seed { 205 h.initSeed() 206 return h.seed 207 } 208 209 // SetSeed sets h to use seed, which must have been returned by MakeSeed 210 // or by another Hash's Seed method. 211 // Two Hash objects with the same seed behave identically. 212 // Two Hash objects with different seeds will very likely behave differently. 213 // Any bytes added to h before this call will be discarded. 214 func (h *Hash) SetSeed(seed Seed) { 215 if seed.s == 0 { 216 panic("maphash: use of uninitialized Seed") 217 } 218 h.seed = seed 219 h.state = seed 220 h.n = 0 221 } 222 223 // Reset discards all bytes added to h. 224 // (The seed remains the same.) 225 func (h *Hash) Reset() { 226 h.initSeed() 227 h.state = h.seed 228 h.n = 0 229 } 230 231 // precondition: buffer is full. 232 func (h *Hash) flush() { 233 if h.n != len(h.buf) { 234 panic("maphash: flush of partially full buffer") 235 } 236 h.initSeed() 237 h.state.s = rthash(&h.buf[0], h.n, h.state.s) 238 h.n = 0 239 } 240 241 // Sum64 returns h's current 64-bit value, which depends on 242 // h's seed and the sequence of bytes added to h since the 243 // last call to Reset or SetSeed. 244 // 245 // All bits of the Sum64 result are close to uniformly and 246 // independently distributed, so it can be safely reduced 247 // by using bit masking, shifting, or modular arithmetic. 248 func (h *Hash) Sum64() uint64 { 249 h.initSeed() 250 return rthash(&h.buf[0], h.n, h.state.s) 251 } 252 253 // MakeSeed returns a new random seed. 254 func MakeSeed() Seed { 255 var s uint64 256 for { 257 s = runtime_fastrand64() 258 // We use seed 0 to indicate an uninitialized seed/hash, 259 // so keep trying until we get a non-zero seed. 260 if s != 0 { 261 break 262 } 263 } 264 return Seed{s: s} 265 } 266 267 //go:linkname runtime_fastrand64 runtime.fastrand64 268 func runtime_fastrand64() uint64 269 270 func rthash(ptr *byte, len int, seed uint64) uint64 { 271 if len == 0 { 272 return seed 273 } 274 // The runtime hasher only works on uintptr. For 64-bit 275 // architectures, we use the hasher directly. Otherwise, 276 // we use two parallel hashers on the lower and upper 32 bits. 277 if unsafe.Sizeof(uintptr(0)) == 8 { 278 return uint64(runtime_memhash(unsafe.Pointer(ptr), uintptr(seed), uintptr(len))) 279 } 280 lo := runtime_memhash(unsafe.Pointer(ptr), uintptr(seed), uintptr(len)) 281 hi := runtime_memhash(unsafe.Pointer(ptr), uintptr(seed>>32), uintptr(len)) 282 return uint64(hi)<<32 | uint64(lo) 283 } 284 285 //go:linkname runtime_memhash runtime.memhash 286 //go:noescape 287 func runtime_memhash(p unsafe.Pointer, seed, s uintptr) uintptr 288 289 // Sum appends the hash's current 64-bit value to b. 290 // It exists for implementing hash.Hash. 291 // For direct calls, it is more efficient to use Sum64. 292 func (h *Hash) Sum(b []byte) []byte { 293 x := h.Sum64() 294 return append(b, 295 byte(x>>0), 296 byte(x>>8), 297 byte(x>>16), 298 byte(x>>24), 299 byte(x>>32), 300 byte(x>>40), 301 byte(x>>48), 302 byte(x>>56)) 303 } 304 305 // Size returns h's hash value size, 8 bytes. 306 func (h *Hash) Size() int { return 8 } 307 308 // BlockSize returns h's block size. 309 func (h *Hash) BlockSize() int { return len(h.buf) } 310