194 lines
8.1 KiB
C
194 lines
8.1 KiB
C
/*
|
|
Copyright (c) Microsoft Corporation
|
|
|
|
All rights reserved.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in
|
|
compliance with the License. You may obtain a copy of the License
|
|
at http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
|
THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER
|
|
EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED WARRANTIES OR CONDITIONS OF
|
|
TITLE, FITNESS FOR A PARTICULAR PURPOSE, MERCHANTABLITY OR NON-INFRINGEMENT.
|
|
|
|
|
|
See the Apache Version 2.0 License for specific language governing permissions and
|
|
limitations under the License.
|
|
|
|
*/
|
|
|
|
/* (c) Microsoft Corporation. All rights reserved. */
|
|
#pragma once
|
|
#include <string.h>
|
|
#include "basic_types.h"
|
|
|
|
#undef IndexAssert
|
|
#define IndexAssert(expr) LogAssert(expr)
|
|
typedef __int64 Dryad_dupelim_fprint_int64_t;
|
|
typedef UInt64 Dryad_dupelim_fprint_uint64_t;
|
|
|
|
/* the type of a 64-bit fingerprint */
|
|
typedef Dryad_dupelim_fprint_uint64_t Dryad_dupelim_fprint_t;
|
|
|
|
/* an opaque type used to keep the data structures need to compute
|
|
fingerprints. */
|
|
|
|
typedef struct Dryad_dupelim_fprint_data_s *Dryad_dupelim_fprint_data_t;
|
|
typedef const struct Dryad_dupelim_fprint_data_s *Dryad_dupelim_fprint_data_tc;
|
|
|
|
/* hash lengths */
|
|
enum HashPolyLength{
|
|
Poly8bit,
|
|
Poly16bit,
|
|
Poly32bit,
|
|
Poly64bit} ;
|
|
|
|
/* Allocate and return a new Rabin fingerprint function.
|
|
Rabin fingerprint belongs to the family of CRC hashes
|
|
Its collusion is bounded by a very small number
|
|
Since it employs polynomials in a galois field, it is very
|
|
efficient in calculating recursive hashes
|
|
|
|
|
|
for straight-forward applications, use Dryad_dupelim_rabinhash_create() and
|
|
Dryad_dupelim_rabinhash_process() in your applications. unless you
|
|
understand what the other functions exactly do, refrain from using them.
|
|
|
|
Dryad_dupelim_rabinhash_new()----------------------
|
|
returns true if a hash function is created,
|
|
fprint data structure should already been allocated
|
|
returns a pointer to the hash function created on pHashfunction
|
|
hashLen is the order of polynomials to be used for the hash function
|
|
seed is the index of the polynomial to be used in the hash function
|
|
seed has to be less than or equal to cbPolysN {N = 8 , 16, 32, 64}
|
|
otherwise, Dryad_dupelim_rabinhash_new will return false
|
|
*/
|
|
bool Dryad_dupelim_rabinhash_init (Dryad_dupelim_fprint_data_s* pHashData,
|
|
HashPolyLength hashLen,
|
|
UInt32 seed);
|
|
/* if fp was generated with polynomial P,bytes "data[0, ..., len-1]"
|
|
contain string A, return the fingerprint under P of A.
|
|
Strings are treated as polynomials. The low-order bit in the first
|
|
byte is the highest degree coefficient in the polynomial.
|
|
*/
|
|
Dryad_dupelim_fprint_t Dryad_dupelim_rabinhash_process(Dryad_dupelim_fprint_data_s* pHashData,
|
|
const unsigned char *data, unsigned len);
|
|
|
|
/* if fp was generated with polynomial P,bytes "data[0, ..., len-1]"
|
|
contain string B, and initialHash contains the hash value for string A
|
|
return the fingerprint under P of A added to initialHash.
|
|
the output value is merely the hash of string A concat string B.
|
|
Strings are treated as polynomials. The low-order bit in the first
|
|
byte is the highest degree coefficient in the polynomial.
|
|
*/
|
|
|
|
Dryad_dupelim_fprint_t Dryad_dupelim_rabinhash_add(Dryad_dupelim_fprint_data_s* pHashFunction, Dryad_dupelim_fprint_t initialHash,
|
|
const unsigned char *data, unsigned len);
|
|
|
|
|
|
/* Allocate and return a new fingerprint function.
|
|
|
|
Computes the tables needed for fingerprint manipulations.
|
|
Requires that "poly" be the binary representation
|
|
of an irreducible polynomial in GF(2) of degree 64. The X^64 term
|
|
is not represented. The X^0 term is the high order bit, and the
|
|
X^63 term is the low-order bit.
|
|
|
|
span is used in later calls to Dryad_dupelim_fprint_slide_word().
|
|
If Dryad_dupelim_fprint_slide_word() is not to be called, span
|
|
should be set to zero. */
|
|
Dryad_dupelim_fprint_data_t Dryad_dupelim_fprint_new (Dryad_dupelim_fprint_t poly,
|
|
unsigned span);
|
|
|
|
/* Like "new" above, except that the degree can be any value between 1
|
|
and 64. Return 0 if that's not true.
|
|
|
|
The X^(degree-1) term is in the low-order bit of poly.
|
|
*/
|
|
Dryad_dupelim_fprint_data_t Dryad_dupelim_fprint_new2 (Dryad_dupelim_fprint_t poly,
|
|
unsigned span, int degree);
|
|
|
|
/* returns the seeded polynomial ie. fingerprint of an empty element under this fp */
|
|
Dryad_dupelim_fprint_t Dryad_dupelim_fprint_empty (Dryad_dupelim_fprint_data_tc fp);
|
|
|
|
/* if fp was generated with polynomial P, "a" is the fingerprint under
|
|
P of string A, and bytes "data[0, ..., len-1]" contain string B,
|
|
return the fingerprint under P of the concatenation of A and B.
|
|
Strings are treated as polynomials. The low-order bit in the first
|
|
byte is the highest degree coefficient in the polynomial. This
|
|
routine differs from Dryad_dupelim_fprint_extend_word() in that it
|
|
will read bytes in increasing address order, regardless of the
|
|
endianness of the machine.
|
|
data's length is the number of unsigned chars
|
|
*/
|
|
Dryad_dupelim_fprint_t Dryad_dupelim_fprint_extend (Dryad_dupelim_fprint_data_tc fp,
|
|
Dryad_dupelim_fprint_t a,
|
|
const unsigned char *data, unsigned len);
|
|
|
|
/* If fp was generated with polynomial P, "a" is the fingerprint under
|
|
P of string A, and 64-bit words "data[0, ..., len-1]" contain
|
|
string B, return the fingerprint under P of the concatenation of A
|
|
and B. Arrays of words are treated as polynomials. The low-order
|
|
bit in the first word is the highest degree coefficient in the
|
|
polynomial. This routine differs from Dryad_dupelim_fprint_extend()
|
|
on bigendian machines, where the byte order within each word is
|
|
backwards. */
|
|
Dryad_dupelim_fprint_t
|
|
Dryad_dupelim_fprint_extend_word (Dryad_dupelim_fprint_data_tc fp,
|
|
Dryad_dupelim_fprint_t a,
|
|
const Dryad_dupelim_fprint_uint64_t *data,
|
|
unsigned len);
|
|
|
|
/* if fp was generated with polynomial P, "a" is the fingerprint under
|
|
P of string A, and "b" is the fingerprint under P of string B,
|
|
which has length "blen" bytes, return the fingerprint under P of
|
|
the concatenation of A and B */
|
|
Dryad_dupelim_fprint_t
|
|
Dryad_dupelim_fprint_concat(Dryad_dupelim_fprint_data_tc fp,
|
|
Dryad_dupelim_fprint_t a, Dryad_dupelim_fprint_t
|
|
b, Dryad_dupelim_fprint_t blen);
|
|
|
|
|
|
/* Turn fingerprint "f" into a hexadecimal, ascii-zero-filled
|
|
printable string S of length 16, and place the characters in
|
|
buf[0,...,15]. No null terminator is written by the routine. */
|
|
void Dryad_dupelim_fprint_toascii (Dryad_dupelim_fprint_t f, char *buf);
|
|
|
|
/* if "fp" was generated with polynomial P, X is some string of length
|
|
"(span-1)*sizeof (Dryad_dupelim_fprint_uint64_t)" bytes (see
|
|
Dryad_dupelim_fprint_new()), "f" is the fingerprint under P of word
|
|
"a" concatenated with X, return the fingerprint under P of X
|
|
concatenated with word "b". The words "a" and "b" represent
|
|
polynomials whose X^0 term is in the high-order bit, and whose X^63
|
|
term is in the low order bit. */
|
|
Dryad_dupelim_fprint_t
|
|
Dryad_dupelim_fprint_slideword (Dryad_dupelim_fprint_data_tc fp,
|
|
Dryad_dupelim_fprint_t f,
|
|
Dryad_dupelim_fprint_uint64_t a,
|
|
Dryad_dupelim_fprint_uint64_t b);
|
|
|
|
/* discard the data associated with "fp" */
|
|
void Dryad_dupelim_fprint_close (Dryad_dupelim_fprint_data_t fp);
|
|
|
|
/* fprint struct */
|
|
struct Dryad_dupelim_fprint_data_s {
|
|
Dryad_dupelim_fprint_t poly[2];
|
|
/* poly[0] = 0; poly[1] = polynomial */
|
|
Dryad_dupelim_fprint_t empty;
|
|
/* fingerprint of the empty string */
|
|
Dryad_dupelim_fprint_t bybyte[8][256];
|
|
/* bybyte[b][i] is i*X^(degree+8*b) mod poly[1] */
|
|
Dryad_dupelim_fprint_t powers[64];
|
|
/* extend[i] is X^(8*2^i) mod poly[1] */
|
|
static const UInt32 LOGZEROBLOCK = 8;
|
|
static const UInt32 ZEROBLOCK = (1 << LOGZEROBLOCK);
|
|
union {
|
|
double align;
|
|
unsigned char zeroes[ZEROBLOCK];
|
|
} zeroes;
|
|
Dryad_dupelim_fprint_t bybyte_out[8][256];
|
|
/* bybyte_out[b][i] is i*X^(degree+8*(b+span)) mod poly[1] */
|
|
unsigned span;
|
|
};
|