/* Copyright (c) Microsoft Corporation All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, MERCHANTABLITY OR NON-INFRINGEMENT. See the Apache Version 2.0 License for specific language governing permissions and limitations under the License. */ // // � Microsoft Corporation. All rights reserved. // using System; using System.IO; namespace Microsoft.Research.DryadLinq.Internal { /// /// A class to compute 64 bit Rabin fingerprints. /// internal class Hash64 { private const int LOGZEROBLOCK = 8; private const int ZEROBLOCK = 1 << LOGZEROBLOCK; internal const UInt64 Empty = 0x911498ae0e66bad6UL; internal static readonly Hash64 Hasher = new Hash64(Empty, 8); // poly[0] = 0; poly[1] = polynomial private UInt64[] poly = new UInt64[2]; // bybyte[b,i] is i*X^(64+8*b) mod poly[1] private UInt64[,] bybyte = new UInt64[8,256]; // extend[i] is X^(8*2^i) mod poly[1] private UInt64[] powers = new UInt64[64]; private byte[] zeroes = new byte[ZEROBLOCK]; // bybyte[b,i] is i*X^(64+8*(b+span)) mod poly[1] private UInt64[,] bybyte_out = new UInt64[8,256]; private int span; /// /// Computes the tables needed for fingerprint manipulations. /// Requires that "poly" be the binary representation /// of an irreducible polynomial in GF(2) of degree 64. The X^64 term /// is not represented. The X^0 term is the high order bit, and the /// X^63 term is the low-order bit. /// span is used in later calls to SlideWord(). If SlideWord() /// is not to be called, span should be set to zero. /// internal Hash64(UInt64 poly, int span) { this.poly[0] = 0; this.poly[1] = poly; // This must be initialized early on this.span = span; // bybyte[][] must be initialized before powers[] this.InitByByte(this.bybyte, poly); // zeroes must be initialized before powers[] for (int i = 0; i < this.zeroes.Length; i++) this.zeroes[i] = 0; // The initialization of powers[] must happen after bybyte[][] // and zeroes are initialized because concat uses all of // bybyte[][], zeroes and the prefix of powers[] internally. this.powers[0] = 1ul << 55; uint l = 1; for (int i = 1; i < this.powers.Length; i++, l <<= 1) { this.powers[i] = this.Concat(this.powers[i-1] ^ poly, 0, l); } if (span != 0) { this.InitByByte(this.bybyte_out, this.Concat(0, 0, (uint)(span-1) * 8)); } } private void InitByByte(UInt64[,] bybyte, UInt64 f) { for (int b = 0; b != 8; b++) { bybyte[b,0] = 0; for (int i = 0x80; i != 0; i >>= 1) { bybyte[b,i] = f; f = this.poly[f & 1] ^ (f >> 1); } for (int i = 1; i != 256; i <<= 1) { UInt64 xf = bybyte[b,i]; for (int k = 1; k != i; k++) { bybyte[b,i+k] = xf ^ bybyte[b,k]; } } } } /// /// If fp was generated with polynomial P, "a" is the fingerprint under /// P of string A, and 64-bit words "data[0, ..., len-1]" contain string /// B, return the fingerprint under P of the concatenation of A and B. /// Arrays of words are treated as polynomials. The low-order bit in /// the first word is the highest degree coefficient in the polynomial. /// This routine differs from Extend() on bigendian machines, where the /// byte order within each word is backwards. /// internal UInt64 ExtendWord(UInt64 fpa, UInt64[] data, int start, int len) { for (int i = start; i != start+len; i++) { fpa ^= data[i]; fpa = this.bybyte[7, fpa & 0xff] ^ this.bybyte[6, (fpa >> 8) & 0xff] ^ this.bybyte[5, (fpa >> 16) & 0xff] ^ this.bybyte[4, (fpa >> 24) & 0xff] ^ this.bybyte[3, (fpa >> 32) & 0xff] ^ this.bybyte[2, (fpa >> 40) & 0xff] ^ this.bybyte[1, (fpa >> 48) & 0xff] ^ this.bybyte[0, fpa >> 56]; } return fpa; } /// /// If fp was generated with polynomial P, "a" is the fingerprint under /// P of string A, and "b" is the fingerprint under P of string B, which /// has length "blen" bytes, return the fingerprint under P of the /// concatenation of A and B. /// internal UInt64 Concat(UInt64 a, UInt64 b, UInt64 blen) { UInt64 x = blen; int low = (int)(x & ((1 << LOGZEROBLOCK)-1)); a ^= this.poly[1]; if (low != 0) { a = this.Extend(a, this.zeroes, 0, low); } x >>= LOGZEROBLOCK; for (int i = LOGZEROBLOCK; x != 0; i++) { if ((x & 1) != 0) { UInt64 m = 0; UInt64 e = this.powers[i]; for (UInt64 bit = 1ul << 63; bit != 0; bit >>= 1) { if ((e & bit) != 0) { m ^= a; } a = (a >> 1) ^ this.poly[a & 1]; } a = m; } x >>= 1; } return a ^ b; } /// /// if "fp" was generated with polynomial P, X is some string of length /// "(span-1)*8" bytes (see the FingerPrint constructor), "fpa" is the /// fingerprint under P of word "a" concatenated with X, return the /// fingerprint under P of X concatenated with word "b". The words "a" /// and "b" represent polynomials whose X^0 term is in the high-order bit, /// and whose X^63 term is in the low order bit. /// internal UInt64 SlideWord(UInt64 fp, UInt64 a, UInt64 b) { a ^= this.poly[1] ^ (1ul << 63); fp ^= this.bybyte_out[7,a & 0xff] ^ this.bybyte_out[6,(a >> 8) & 0xff] ^ this.bybyte_out[5,(a >> 16) & 0xff] ^ this.bybyte_out[4,(a >> 24) & 0xff] ^ this.bybyte_out[3,(a >> 32) & 0xff] ^ this.bybyte_out[2,(a >> 40) & 0xff] ^ this.bybyte_out[1,(a >> 48) & 0xff] ^ this.bybyte_out[0,a >> 56]; fp ^= b; fp = this.bybyte[7,fp & 0xff] ^ this.bybyte[6,(fp >> 8) & 0xff] ^ this.bybyte[5,(fp >> 16) & 0xff] ^ this.bybyte[4,(fp >> 24) & 0xff] ^ this.bybyte[3,(fp >> 32) & 0xff] ^ this.bybyte[2,(fp >> 40) & 0xff] ^ this.bybyte[1,(fp >> 48) & 0xff] ^ this.bybyte[0,fp >> 56]; return fp; } /// /// if fp was generated with polynomial P, "fpa" is the fingerprint under /// P of string A, and bytes "data[start, ..., start+len-1]" contain /// string B, return the fingerprint under P of the concatenation of A /// and B. Strings are treated as polynomials. The low-order bit in /// the first byte is the highest degree coefficient in the polynomial. /// This routine differs from ExtendWord() in that it will read bytes /// in increasing address order, regardless of the endianness of the /// machine. /// internal UInt64 Extend(UInt64 fpa, byte[] data, int start, int len) { for (int i = 0; i < len; i++) { fpa = (fpa >> 8) ^ this.bybyte[0,(fpa & 0xff) ^ data[start++]]; } return fpa; } internal unsafe UInt64 Extend(UInt64 fpa, byte* data, int start, int len) { for (int i = 0; i < len; i++) { fpa = (fpa >> 8) ^ this.bybyte[0,(fpa & 0xff) ^ data[start++]]; } return fpa; } internal UInt64 Extend(UInt64 fp, byte b) { return (fp >> 8) ^ this.bybyte[0,(fp & 0xff) ^ b]; } internal UInt64 Extend(UInt64 fp, sbyte b) { return this.Extend(fp, (byte)b); } internal UInt64 Extend(UInt64 fp, bool b) { byte b1 = (byte)((b) ? 1 : 0); return (fp >> 8) ^ this.bybyte[0,(fp & 0xff) ^ b1]; } internal UInt64 Extend(UInt64 fp, char c) { return this.Extend(fp, (ushort)c); } internal UInt64 Extend(UInt64 fp, short v) { return this.Extend(fp, (ushort)v); } internal UInt64 Extend(UInt64 fp, ushort v) { fp ^= v; return ((fp >> 16) ^ this.bybyte[1, fp & 0xff] ^ this.bybyte[0, (fp >> 8) & 0xff]); } internal UInt64 Extend(UInt64 fp, int v) { return this.Extend(fp, (uint)v); } internal UInt64 Extend(UInt64 fp, uint v) { fp ^= v; return ((fp >> 32) ^ (this.bybyte[3, fp & 0xff] ^ this.bybyte[2, (fp >> 8) & 0xff] ^ this.bybyte[1, (fp >> 16) & 0xff] ^ this.bybyte[0, (fp >> 24) & 0xff])); } internal UInt64 Extend(UInt64 fp, long v) { return this.Extend(fp, (UInt64)v); } internal UInt64 Extend(UInt64 fp, UInt64 v) { fp ^= v; return (this.bybyte[7, fp & 0xff] ^ this.bybyte[6, (fp >> 8) & 0xff] ^ this.bybyte[5, (fp >> 16) & 0xff] ^ this.bybyte[4, (fp >> 24) & 0xff] ^ this.bybyte[3, (fp >> 32) & 0xff] ^ this.bybyte[2, (fp >> 40) & 0xff] ^ this.bybyte[1, (fp >> 48) & 0xff] ^ this.bybyte[0, (fp >> 56) & 0xff]); } internal unsafe UInt64 Extend(UInt64 fp, float v) { uint v1 = *(uint*)&v; return this.Extend(fp, v1); } internal unsafe UInt64 Extend(UInt64 fp, decimal v) { UInt64* vals = (UInt64*)&v; fp = this.Extend(fp, *vals); return this.Extend(fp, *(vals + 1)); } internal unsafe UInt64 Extend(UInt64 fp, double v) { UInt64 v1 = *(UInt64*)&v; return this.Extend(fp, v1); } internal UInt64 Extend(UInt64 fp, string s) { byte[] bytes = new byte[s.Length]; for (int i = 0; i < s.Length; i++) { bytes[i] = (byte)(s[i] & 0xff); } return this.Extend(fp, bytes, 0, bytes.Length); } internal UInt64 ExtendFile(UInt64 fp, string filename) { int size = 65536 * 4; byte[] readBuf = new byte[size]; byte[] fpBuf = new byte[size]; ulong fileFP = fp; using (Stream ifs = new FileStream(filename, FileMode.Open, FileAccess.Read, FileShare.Read, size, FileOptions.Asynchronous | FileOptions.SequentialScan)) { IAsyncResult readResult = ifs.BeginRead(readBuf, 0, readBuf.Length, null, null); while (true) { int bytesRead = ifs.EndRead(readResult); if (bytesRead == 0) break; byte[] tmpBuf = fpBuf; fpBuf = readBuf; readBuf = tmpBuf; readResult = ifs.BeginRead(readBuf, 0, readBuf.Length, null, null); fileFP = this.Extend(fileFP, fpBuf, 0, bytesRead); } } return fileFP; } } }