/*
Copyright (c) Microsoft Corporation
All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in
compliance with the License. You may obtain a copy of the License
at http://www.apache.org/licenses/LICENSE-2.0
THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER
EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED WARRANTIES OR CONDITIONS OF
TITLE, FITNESS FOR A PARTICULAR PURPOSE, MERCHANTABLITY OR NON-INFRINGEMENT.
See the Apache Version 2.0 License for specific language governing permissions and
limitations under the License.
*/
using System;
using System.Collections;
using System.Collections.Generic;
using System.IO;
using System.Globalization;
using System.Reflection;
using System.Linq.Expressions;
using System.Linq;
#pragma warning disable 1591
namespace Microsoft.Research.DryadLinq.Internal
{
///
/// DryadLINQ helper functions. They are mainly used in the implementation of DryadLINQ.
///
/// A DryadLINQ user should not need to use DryadLinqHelper directly.
public static class DryadLinqHelper
{
///
/// Check if an input source is sorted.
///
/// The type of input record.
/// The type of key.
/// The input source.
/// The key selection function.
/// A comparer object used to compare keys.
/// True if the check is for descending
///
[Resource(IsStateful = false)]
public static IEnumerable
CheckSort(IEnumerable source,
Expression> keySelector,
IComparer comparer,
bool isDescending)
{
Func keySel = keySelector.Compile();
comparer = TypeSystem.GetComparer(comparer);
IEnumerator elems = source.GetEnumerator();
if (elems.MoveNext())
{
TSource curElem = elems.Current;
yield return curElem;
TKey curKey = keySel(curElem);
while (elems.MoveNext())
{
TSource nextElem = elems.Current;
yield return nextElem;
TKey nextKey = keySel(nextElem);
int cmp = comparer.Compare(curKey, nextKey);
int cmpRes = (isDescending) ? -cmp : cmp;
if (cmpRes > 0)
{
throw new DryadLinqException(SR.SourceNotOrdered);
}
curKey = nextKey;
}
}
}
///
/// Apply a function to the cross product of two input sequences.
///
/// The record type of the first input.
/// The record type of the second input.
/// The record yype of the result.
/// The first input.
/// The second input.
/// The function to apply.
/// The result of applying the function to the cross product of two inputs.
public static IEnumerable Cross(IEnumerable s1,
IEnumerable s2,
Expression> procFunc)
{
Func proc = procFunc.Compile();
bool useRight = true;
if ((s1 is DryadLinqVertexReader) && (s2 is DryadLinqVertexReader))
{
Int64 leftLen = ((DryadLinqVertexReader)s1).GetTotalLength();
Int64 rightLen = ((DryadLinqVertexReader)s2).GetTotalLength();
if (leftLen >= 0 && rightLen >= 0)
{
useRight = rightLen <= leftLen;
}
}
if (useRight)
{
List elems2 = s2.ToList();
foreach (var elem1 in s1)
{
foreach (var elem2 in elems2)
{
yield return proc(elem1, elem2);
}
}
}
else
{
List elems1 = s1.ToList();
foreach (var elem2 in s2)
{
foreach (var elem1 in elems1)
{
yield return proc(elem1, elem2);
}
}
}
}
///
/// Determines whether two sequences are equal according to an equality comparer
///
/// The record type of the sequences.
/// The first sequence.
/// The second sequence.
/// An equality comparer.
/// true iff the two sequences are equal.
public static IEnumerable SequenceEqual(IEnumerable s1,
IEnumerable s2,
IEqualityComparer comparer)
{
return DryadLinqVertex.AsEnumerable(System.Linq.Enumerable.SequenceEqual(s1, s2, comparer));
}
// Used in SlidingWindow()
///
/// Returns the last windowSize-1 records in a sequence.
///
/// The record type of the sequence.
/// The input sequence.
/// The window size
/// The last windowSize-1 records as an array.
/// Used in sliding windows computations.
[Resource(IsStateful = false)]
public static IEnumerable Last(IEnumerable source,
int windowSize)
{
int count = windowSize - 1;
T[] buffer = new T[count];
long total = 0;
foreach (var x in source)
{
buffer[total % count] = x;
total++;
}
if (total < count)
{
throw new DryadLinqException(String.Format(SR.PartitionTooSmallForSlidingWindow, count));
}
T[] last = new T[count];
int startIdx = (int)total % count;
Array.Copy(buffer, startIdx, last, 0, count - startIdx);
Array.Copy(buffer, 0, last, count - startIdx, startIdx);
yield return last;
}
public static IEnumerable> Slide(IEnumerable source)
{
using (IEnumerator sourceEnum = source.GetEnumerator())
{
if (sourceEnum.MoveNext())
{
yield return new IndexedValue(0, new T[0]);
int index = 1;
T[] lastVal = sourceEnum.Current;
while (sourceEnum.MoveNext())
{
yield return new IndexedValue(index, lastVal);
index++;
lastVal = sourceEnum.Current;
}
}
}
}
[Resource(IsStateful = false)]
public static IEnumerable
ProcessWindows(IEnumerable> source1,
IEnumerable source2,
Func, T2> procFunc,
Int32 windowSize)
{
Window window = new Window(windowSize);
T1[] slided = source1.Single().Value;
for (int i = 0; i < slided.Length; i++)
{
window.Add(slided[i]);
}
using (IEnumerator sourceEnum = source2.GetEnumerator())
{
while (window.Count() < windowSize)
{
if (!sourceEnum.MoveNext()) break;
window.Add(sourceEnum.Current);
}
if (window.Count() == windowSize)
{
yield return procFunc(window);
while (sourceEnum.MoveNext())
{
window.Add(sourceEnum.Current);
yield return procFunc(window);
}
}
}
}
// Calculate the sizes of the partitions. Used for example to implement Concat.
public static IEnumerable> IndexedCount(IEnumerable source)
{
T[] elems = source.ToArray();
for (int i = 0; i < elems.Length; i++)
{
yield return new IndexedValue(i, elems);
}
}
[Resource(IsStateful = false)]
public static IEnumerable>
AddPartitionIndex(IEnumerable> source1, IEnumerable source2, Int32 pcount)
{
IndexedValue s1 = source1.Single();
long averageCount = s1.Value.Sum() / pcount;
long partialCount = 0;
for (int i = 0; i < s1.Index; i++)
{
partialCount += s1.Value[i];
}
int partIndex = (int)(partialCount / averageCount);
long indexInPart = partialCount % averageCount;
foreach (T elem in source2)
{
if (indexInPart >= averageCount && partIndex != pcount-1)
{
partIndex++;
indexInPart = 0;
}
yield return new IndexedValue(partIndex, elem);
indexInPart++;
}
}
// Produces one dummy item per partition. Used for example to implement Reverse().
[Resource(IsStateful = false)]
public static IEnumerable ValueZero(IEnumerable source)
{
yield return 0;
}
//Used for Reverse()
//input: a sequence of n dummy items. eg {0,0,0... } x n
//output: { {(0,n), (1,n), (2,n), .., (n-1, n)} }
// item.Index = index
// item.Value = nPartitions
public static IEnumerable> MakeIndexCountPairs(IEnumerable source)
{
int count = source.Count();
for (int i = 0; i < count; i++)
{
yield return new IndexedValue(i, count);
}
}
// Used for Reverse()
// receives a pair (myIndex, nPartitions) as source1, and a normal sequence as source2.
// targetIdx = nPartition-myIndex-1
// produces {(targetIdx, item), (targetIdx, item), ...}
public static IEnumerable>
AddIndexForReverse(IEnumerable> source1, IEnumerable source2)
{
IndexedValue item = source1.Single();
int myIndex = item.Index;
int pcount = item.Value;
int targetIndex = pcount - myIndex - 1;
foreach (T elem in source2)
{
yield return new IndexedValue(targetIndex, elem);
}
}
// Used in Zip()
public static IEnumerable>>
ZipCount(IEnumerable source1, IEnumerable source2)
{
long[] elems1 = source1.ToArray();
long[] elems2 = source2.ToArray();
Pair pair = new Pair(elems1, elems2);
for (int i = 0; i < elems2.Length; i++)
{
yield return new IndexedValue>(i, pair);
}
}
public static IEnumerable>
AssignPartitionIndex(IEnumerable>> source1,
IEnumerable source2)
{
IndexedValue> s1 = source1.Single();
long[] elems1 = s1.Value.Key;
long[] elems2 = s1.Value.Value;
long partialCount = 0;
for (int i = 0; i < s1.Index; i++)
{
partialCount += elems2[i];
}
int partIndex = 0;
for (partIndex = 0; partIndex < elems1.Length; partIndex++)
{
partialCount -= elems1[partIndex];
if (partialCount < 0) break;
}
if (partialCount < 0)
{
foreach (T elem in source2)
{
yield return new IndexedValue(partIndex, elem);
partialCount++;
if (partialCount == 0)
{
for (partIndex = partIndex + 1; partIndex < elems1.Length; partIndex++)
{
partialCount = -elems1[partIndex];
if (partialCount < 0) break;
}
if (partialCount == 0) break;
}
}
}
}
// Used in SelectWithPartitionIndex()
public static IEnumerable AssignIndex(IEnumerable source)
{
int index = 0;
foreach (int elem in source)
{
yield return index;
index++;
}
}
public static IEnumerable
ApplyWithPartitionIndex(IEnumerable source1,
IEnumerable source2,
Func, int, IEnumerable> procFunc)
{
int index = source2.Single();
return procFunc(source1, index);
}
public static void CheckVertexDebugRequest()
{
string debugEnvVar = Environment.GetEnvironmentVariable("DRYADLINQ_DEBUGVERTEX");
if (debugEnvVar == null) return;
if (String.Compare(debugEnvVar, "LAUNCH", StringComparison.OrdinalIgnoreCase) == 0)
{
System.Diagnostics.Debugger.Launch();
}
else
{
DryadLinqLog.AddInfo("Waiting for debugger to attach...");
while (!System.Diagnostics.Debugger.IsAttached)
{
System.Threading.Thread.Sleep(1000);
}
System.Diagnostics.Debugger.Break();
}
}
}
internal class Window : IEnumerable
{
private T[] m_elems;
private int m_startIdx;
private int m_count;
public Window(int len)
{
this.m_elems = new T[len];
this.m_startIdx = 0;
this.m_count = 0;
}
public void Add(T elem)
{
int nextIdx = this.m_startIdx + this.m_count;
if (nextIdx >= this.m_elems.Length)
{
nextIdx -= this.m_elems.Length;
}
this.m_elems[nextIdx] = elem;
if (this.m_count < this.m_elems.Length)
{
this.m_count++;
}
else
{
this.m_startIdx++;
if (this.m_startIdx == this.m_elems.Length)
{
this.m_startIdx = 0;
}
}
}
public int Count()
{
return this.m_count;
}
#region IEnumerable and IEnumerable members
IEnumerator IEnumerable.GetEnumerator()
{
return this.GetEnumerator();
}
public IEnumerator GetEnumerator()
{
int idx = this.m_startIdx;
for (int i = 0; i < this.m_count; i++)
{
yield return this.m_elems[idx];
idx++;
if (idx == this.m_elems.Length) idx = 0;
}
}
#endregion
}
}