Dryad/LinqToDryad/DryadLinqUtil.cs

633 lines
22 KiB
C#

/*
Copyright (c) Microsoft Corporation
All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in
compliance with the License. You may obtain a copy of the License
at http://www.apache.org/licenses/LICENSE-2.0
THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER
EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED WARRANTIES OR CONDITIONS OF
TITLE, FITNESS FOR A PARTICULAR PURPOSE, MERCHANTABLITY OR NON-INFRINGEMENT.
See the Apache Version 2.0 License for specific language governing permissions and
limitations under the License.
*/
using System;
using System.Collections;
using System.Collections.Generic;
using System.Text;
using System.IO;
using System.Runtime.Serialization;
using System.Runtime.Serialization.Formatters.Binary;
using System.Security.Cryptography;
using System.Text.RegularExpressions;
using System.Reflection;
using System.Linq;
using System.Diagnostics;
using Microsoft.CSharp;
using Microsoft.Research.DryadLinq.Internal;
namespace Microsoft.Research.DryadLinq
{
// This class contains some useful utility functions.
internal static class DryadLinqUtil
{
private static Regex s_CSharpIdRegex = new Regex(@"[^\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Nd}\p{Nl}\p{Mn}\p{Mc}\p{Cf}\p{Pc}\p{Lm}]");
private static CSharpCodeProvider s_CSharpCodeProvider = new CSharpCodeProvider();
// Check if name is a valid identifier.
internal static bool IsValidId(string name)
{
return s_CSharpCodeProvider.IsValidIdentifier(name);
}
// Make name to be a valid identifier.
internal static string MakeValidId(string name)
{
return s_CSharpIdRegex.Replace(name, "_");
}
// Note: this is used in various places, including vertex-code for naming files.
internal static string MakeUniqueName()
{
return System.Guid.NewGuid().ToString();
}
internal static int GetTaskIndex(int hcode, int count)
{
int x1 = hcode & 0xFF;
int x2 = (hcode >> 8) & 0xFF;
int x3 = (hcode >> 16) & 0xFFFF;
return (x1 ^ x2 ^ x3) % count;
}
// Check if the array is ordered.
internal static bool IsOrdered<T>(T[] array, IComparer<T> comparer, bool isDescending)
{
comparer = TypeSystem.GetComparer<T>(comparer);
if (array.Length < 2) return true;
T elem = array[0];
for (int i = 1; i < array.Length; i++)
{
int cmp = comparer.Compare(elem, array[i]);
int cmpRes = (isDescending) ? -cmp : cmp;
if (cmpRes > 0) return false;
elem = array[i];
}
return true;
}
/// <summary>
/// Binary search. The array must be ordered. Assume that comparer != null
/// The input is an array {a_i} of n items and a value to search for.
/// The output is a value in the range [0..n].
/// The output can be directly used as the port-number to emit the item to for range-distribution.
/// </summary>
/// <typeparam name="T">The type of the elements in the array.</typeparam>
/// <param name="array">The elements.</param>
/// <param name="value">The value to be found.</param>
/// <param name="comparer">A comparer to compare the elements of type T.</param>
/// <param name="isDescending">Whether the array is ordered descending (true) or ascending (false).</param>
/// <returns>The left-most value that is greaterthan-or-equal to value (lessthan-or-equal for descending).</returns>
// retVal is the idx of the item which satisfies
//
// Ascending: arr[idx-1] < x <= arr[idx] arr[-1] = -inf arr[n] = +inf
// idx is the index of the left-most element which is greater-or-equal than x
// The smallest retVal is 0
// The largest retVal is n
//
// Descending: arr[idx-1] > x >= arr[idx] arr[-1] = +inf arr[n] = -inf
// idx is the index of the left-most element which is less-or-equal than x.
// The smallest retVal is 0
// The largest retVal is n
//
// Although this isn't a strictly 'symmetrical rule', the "always left-most" is simple and consistent.
internal static int BinarySearch<T>(T[] array, T value, IComparer<T> comparer, bool isDescending)
{
int lo = 0;
int hi = array.Length - 1;
while (lo <= hi)
{
int i = lo + ((hi - lo) >> 1);
int cmp = comparer.Compare(array[i], value);
if (cmp == 0){
// ensure we are on the left-most matching item.
// Note: linear-search isn't ideal if there are many equal values, but that should not be common.
while (i > 0 && comparer.Compare(array[i-1], value) == 0)
i--;
}
cmp = (isDescending) ? -cmp : cmp;
if (cmp < 0)
{
lo = i + 1;
}
else
{
hi = i - 1;
}
}
return lo;
}
/// <summary>
/// Mergesort a list of sorted datasets.
/// </summary>
/// <typeparam name="TSource">The type of the records of the input datasets</typeparam>
/// <typeparam name="TKey">The type of the keys to sort</typeparam>
/// <param name="sources">An array of input datasets</param>
/// <param name="keySelector">The key extraction function</param>
/// <param name="comparer">A Comparer on TKey to compare keys</param>
/// <param name="isDescending">True if the mergesort is descending</param>
/// <returns>The mergesort result</returns>
internal static IEnumerable<TSource>
MergeSort<TSource, TKey>(IEnumerable<TSource>[] sources,
Func<TSource, TKey> keySelector,
IComparer<TKey> comparer,
bool isDescending)
{
comparer = TypeSystem.GetComparer<TKey>(comparer);
IEnumerable<TSource>[] currentLayer = sources;
int currentLayerCount = sources.Length;
IEnumerable<TSource>[] nextLayer = new IEnumerable<TSource>[currentLayerCount / 2 + 1];
while (currentLayerCount != 1)
{
int nextLayerCount = currentLayerCount / 2;
int idx = 0;
for (int i = 0; i < nextLayerCount; i++)
{
nextLayer[i] = BinaryMergeSort<TSource, TKey>(currentLayer[idx],
currentLayer[idx + 1],
keySelector,
comparer,
isDescending);
idx += 2;
}
if (idx < currentLayerCount)
{
nextLayer[nextLayerCount] = currentLayer[idx];
nextLayerCount++;
}
currentLayer = nextLayer;
currentLayerCount = nextLayerCount;
}
return currentLayer[0];
}
/// <summary>
/// Mergesort two input sorted datasets.
/// </summary>
/// <typeparam name="TSource">The type of the records of the datasets</typeparam>
/// <typeparam name="TKey">The type of the keys to sort</typeparam>
/// <param name="source1">The first input dataset</param>
/// <param name="source2">The second input dataset</param>
/// <param name="keySelector">The key extraction function</param>
/// <param name="comparer">A Comparer on TKey to compare keys</param>
/// <param name="isDescending">True if the sort is descending</param>
/// <returns>The mergesort result</returns>
internal static IEnumerable<TSource>
BinaryMergeSort<TSource, TKey>(IEnumerable<TSource> source1,
IEnumerable<TSource> source2,
Func<TSource, TKey> keySelector,
IComparer<TKey> comparer,
bool isDescending)
{
comparer = TypeSystem.GetComparer<TKey>(comparer);
IEnumerator<TSource> leftElems = source1.GetEnumerator();
IEnumerator<TSource> rightElems = source2.GetEnumerator();
if (leftElems.MoveNext())
{
if (rightElems.MoveNext())
{
TKey leftKey = keySelector(leftElems.Current);
TKey rightKey = keySelector(rightElems.Current);
while (true)
{
int cmp = comparer.Compare(leftKey, rightKey);
int cmpRes = (isDescending) ? -cmp : cmp;
if (cmpRes > 0)
{
yield return rightElems.Current;
if (!rightElems.MoveNext())
{
yield return leftElems.Current;
break;
}
rightKey = keySelector(rightElems.Current);
}
else
{
yield return leftElems.Current;
if (!leftElems.MoveNext())
{
yield return rightElems.Current;
leftElems = rightElems;
break;
}
leftKey = keySelector(leftElems.Current);
}
}
}
}
else
{
leftElems = rightElems;
}
while (leftElems.MoveNext())
{
yield return leftElems.Current;
}
}
// Swap the bytes
internal static UInt64 ByteSwap(UInt64 x)
{
return (x << 56)
| (x >> 56)
| ((x & 0x0000ff00UL) << 40)
| ((x >> 40) & 0x0000ff00UL)
| ((x & 0x00ff0000UL) << 24)
| ((x >> 24) & 0x00ff0000UL)
| ((x & 0xff000000UL) << 8)
| ((x >> 8) & 0xff000000UL);
}
private static byte[] ObjectToByteArray(Object objectToSerialize)
{
MemoryStream fs = new MemoryStream();
BinaryFormatter formatter = new BinaryFormatter();
try
{
formatter.Serialize(fs, objectToSerialize);
return fs.ToArray();
}
catch (SerializationException se)
{
DryadLinqClientLog.Add("Error occured during serialization. Message: " + se.Message);
throw;
}
finally
{
fs.Close();
}
}
private static Object ByteArrayToObject(byte[] rep)
{
MemoryStream fs = new MemoryStream(rep);
BinaryFormatter bfm = new BinaryFormatter();
try
{
Object o = bfm.Deserialize(fs);
return o;
}
catch (SerializationException e)
{
throw new DryadLinqException(DryadLinqErrorCode.FailedToDeserialize, SR.FailedToDeserialize, e);
}
finally
{
fs.Close();
}
}
internal static string MD5(Object ob)
{
byte[] payload = ObjectToByteArray(ob);
MD5 md5 = new MD5CryptoServiceProvider();
byte[] result = md5.ComputeHash(payload);
StringBuilder sb = new StringBuilder();
for (int i = 0; i < result.Length; i++)
{
sb.Append(result[i].ToString("X2"));
}
return sb.ToString();
}
internal static string ReplaceWithLast(Match m)
{
return m.Groups[1].Value;
}
// matches each simple type name in a nested template type.
private static Regex dotted = new Regex(@"[^<>,]*\.([^\.<>,]*)", RegexOptions.Compiled);
internal static string SimpleName(string name)
{
// the typename may contain nested templates; simplify every component to keep the text after the last dot
string result = dotted.Replace(name, ReplaceWithLast);
return result;
}
internal static string MapToString<T>(Dictionary<T, T[]> map)
{
StringBuilder sb = new StringBuilder();
sb.Append('[');
bool isFirst = true;
foreach (KeyValuePair<T, T[]> kv in map)
{
if (isFirst)
{
isFirst = false;
}
else
{
sb.AppendLine(",");
sb.Append(' ');
}
sb.Append(kv.Key);
sb.Append(" -> <");
bool isFirst1 = true;
foreach (T v in kv.Value)
{
if (isFirst1)
{
isFirst1 = false;
}
sb.Append(v);
}
sb.Append('>');
}
sb.Append(']');
return sb.ToString();
}
// Unsafe memcpy from System.Buffer
internal unsafe static void memcpy(byte* src, byte* dest, int len)
{
if (len < 0)
{
throw new ArgumentException("len < 0", "len");
}
#if FEATURE_PAL
// Portable naive implementation
while (len-- > 0)
*dest++ = *src++;
#else
#if IA64
// IA64 implementation
long dstAlign = 8 - (((long)dest) & 7); // number of bytes to copy before dest is 8-byte aligned
while ((dstAlign > 0) && (len > 0))
{
*dest++ = *src++;
len--;
dstAlign--;
}
long srcAlign = 8 - (((long)src) & 7);
if (len > 0)
{
if (srcAlign != 8)
{
if (4 == srcAlign)
{
while (len >= 4)
{
((int*)dest)[0] = ((int*)src)[0];
dest += 4;
src += 4;
len -= 4;
}
srcAlign = 2; // fall through to 2-byte copies
}
if ((2 == srcAlign) || (6 == srcAlign))
{
while (len >= 2)
{
((short*)dest)[0] = ((short*)src)[0];
dest += 2;
src += 2;
len -= 2;
}
}
while (len-- > 0)
{
*dest++ = *src++;
}
}
else
{
if (len >= 16)
{
do
{
((long*)dest)[0] = ((long*)src)[0];
((long*)dest)[1] = ((long*)src)[1];
dest += 16;
src += 16;
} while ((len -= 16) >= 16);
}
if (len > 0) // protection against negative len and optimization for len==16*N
{
if ((len & 8) != 0)
{
((long*)dest)[0] = ((long*)src)[0];
dest += 8;
src += 8;
}
if ((len & 4) != 0)
{
((int*)dest)[0] = ((int*)src)[0];
dest += 4;
src += 4;
}
if ((len & 2) != 0)
{
((short*)dest)[0] = ((short*)src)[0];
dest += 2;
src += 2;
}
if ((len & 1) != 0)
{
*dest++ = *src++;
}
}
}
}
#else
// AMD64 implementation uses longs instead of ints where possible
if (len >= 16)
{
do
{
#if AMD64
((long*)dest)[0] = ((long*)src)[0];
((long*)dest)[1] = ((long*)src)[1];
#else
((int*)dest)[0] = ((int*)src)[0];
((int*)dest)[1] = ((int*)src)[1];
((int*)dest)[2] = ((int*)src)[2];
((int*)dest)[3] = ((int*)src)[3];
#endif
dest += 16;
src += 16;
} while ((len -= 16) >= 16);
}
if (len > 0) // protection against negative len and optimization for len==16*N
{
if ((len & 8) != 0)
{
#if AMD64
((long*)dest)[0] = ((long*)src)[0];
#else
((int*)dest)[0] = ((int*)src)[0];
((int*)dest)[1] = ((int*)src)[1];
#endif
dest += 8;
src += 8;
}
if ((len & 4) != 0)
{
((int*)dest)[0] = ((int*)src)[0];
dest += 4;
src += 4;
}
if ((len & 2) != 0)
{
((short*)dest)[0] = ((short*)src)[0];
dest += 2;
src += 2;
}
if ((len & 1) != 0)
*dest++ = *src++;
}
#endif // IA64
#endif // FEATURE_PAL
}
//Utility function to determine if a sequence of partition-keys is ascending or descending consistently
// if the sequence is neither ascending or descending, an exception is thrown.
// if the sequence is single element or a series of equal values, the return is null == inconclusive/both.
// if the return value == false, the value of isDescending is meaningless.
internal static bool ComputeIsDescending<TKey>(TKey[] partitionKeys, IComparer<TKey> comparer, out bool? isDescending)
{
if (partitionKeys.Length == 0 || partitionKeys.Length == 1)
{
isDescending = null; // neither specifically ascending nor descending.
return true; // everything is OK.
}
// Determine if the keys are ascending/descending and whether they are consistent
isDescending = null; // initially we don't know (and equal keys may delay identification)
TKey curr = partitionKeys[0];
for (int i = 1; i < partitionKeys.Length; i++)
{
int cmp = comparer.Compare(curr, partitionKeys[i]);
if (cmp == 0)
{
// do nothing
}
if (cmp < 0)
{
if (isDescending == null)
{
isDescending = false; // the sequence appears to be ascending
}
if (isDescending == true)
{
return false; // there was an inconsistency.
}
}
if (cmp > 0)
{
if (isDescending == null)
{
isDescending = true; // the sequence appears to be descending
}
if (isDescending == false)
{
return false; // there was an inconsistency.
}
}
curr = partitionKeys[i];
}
return true;
}
}
internal class FList<T>
{
public T elem;
public FList<T> next;
internal static FList<T> Empty = new FList<T>(default(T), null);
internal FList(T elem, FList<T> next)
{
this.elem = elem;
this.next = next;
}
internal FList<T> Cons(T elem, FList<T> next)
{
return new FList<T>(elem, next);
}
internal bool Find(T x)
{
FList<T> curr = this;
while (curr != Empty)
{
if (curr.elem.Equals(x)) return true;
curr = curr.next;
}
return false;
}
public override string ToString()
{
StringBuilder sb = new StringBuilder();
sb.Append("< ");
if (this != Empty)
{
sb.Append(this.elem);
FList<T> curr = this.next;
while (curr != Empty)
{
sb.Append(", " + curr.elem.ToString());
curr = curr.next;
}
sb.Append(" >");
}
return sb.ToString();
}
}
internal class Wrapper<T>
{
internal T item;
internal Wrapper(T item)
{
this.item = item;
}
}
}