417 lines
14 KiB
C#
417 lines
14 KiB
C#
/*
|
||
Copyright (c) Microsoft Corporation
|
||
|
||
All rights reserved.
|
||
|
||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in
|
||
compliance with the License. You may obtain a copy of the License
|
||
at http://www.apache.org/licenses/LICENSE-2.0
|
||
|
||
|
||
THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER
|
||
EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED WARRANTIES OR CONDITIONS OF
|
||
TITLE, FITNESS FOR A PARTICULAR PURPOSE, MERCHANTABLITY OR NON-INFRINGEMENT.
|
||
|
||
|
||
See the Apache Version 2.0 License for specific language governing permissions and
|
||
limitations under the License.
|
||
|
||
*/
|
||
|
||
//
|
||
// <20> Microsoft Corporation. All rights reserved.
|
||
//
|
||
using System;
|
||
using System.Collections;
|
||
using System.Collections.Generic;
|
||
using System.IO;
|
||
using System.Globalization;
|
||
using System.Reflection;
|
||
using System.Linq.Expressions;
|
||
using System.Linq;
|
||
|
||
namespace Microsoft.Research.DryadLinq.Internal
|
||
{
|
||
public static class HpcLinqHelper
|
||
{
|
||
[Resource(IsStateful = false)]
|
||
public static IEnumerable<TSource>
|
||
CheckSort<TSource, TKey>(IEnumerable<TSource> source,
|
||
Expression<Func<TSource, TKey>> keySelector,
|
||
IComparer<TKey> comparer,
|
||
bool isDescending)
|
||
{
|
||
Func<TSource, TKey> keySel = keySelector.Compile();
|
||
comparer = TypeSystem.GetComparer<TKey>(comparer);
|
||
|
||
IEnumerator<TSource> elems = source.GetEnumerator();
|
||
if (elems.MoveNext())
|
||
{
|
||
TSource curElem = elems.Current;
|
||
yield return curElem;
|
||
|
||
TKey curKey = keySel(curElem);
|
||
while (elems.MoveNext())
|
||
{
|
||
TSource nextElem = elems.Current;
|
||
yield return nextElem;
|
||
|
||
TKey nextKey = keySel(nextElem);
|
||
int cmp = comparer.Compare(curKey, nextKey);
|
||
int cmpRes = (isDescending) ? -cmp : cmp;
|
||
if (cmpRes > 0)
|
||
{
|
||
throw new DryadLinqException(SR.SourceNotOrdered);
|
||
}
|
||
curKey = nextKey;
|
||
}
|
||
}
|
||
}
|
||
|
||
public static IEnumerable<T3> Cross<T1, T2, T3>(IEnumerable<T1> s1,
|
||
IEnumerable<T2> s2,
|
||
Expression<Func<T1, T2, T3>> procFunc)
|
||
{
|
||
Func<T1, T2, T3> proc = procFunc.Compile();
|
||
bool useRight = true;
|
||
if ((s1 is HpcVertexReader<T1>) && (s2 is HpcVertexReader<T2>))
|
||
{
|
||
Int64 leftLen = ((HpcVertexReader<T1>)s1).GetTotalLength();
|
||
Int64 rightLen = ((HpcVertexReader<T2>)s2).GetTotalLength();
|
||
if (leftLen >= 0 && rightLen >= 0)
|
||
{
|
||
useRight = rightLen <= leftLen;
|
||
}
|
||
}
|
||
if (useRight)
|
||
{
|
||
List<T2> elems2 = s2.ToList();
|
||
foreach (var elem1 in s1)
|
||
{
|
||
foreach (var elem2 in elems2)
|
||
{
|
||
yield return proc(elem1, elem2);
|
||
}
|
||
}
|
||
}
|
||
else
|
||
{
|
||
List<T1> elems1 = s1.ToList();
|
||
foreach (var elem2 in s2)
|
||
{
|
||
foreach (var elem1 in elems1)
|
||
{
|
||
yield return proc(elem1, elem2);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
public static IEnumerable<T2> SelectSecond<T1, T2>(IEnumerable<T1> s1, IEnumerable<T2> s2)
|
||
{
|
||
return s2;
|
||
}
|
||
|
||
// Used in SequenceEqual()
|
||
public static IEnumerable<bool> SequenceEqual<T>(IEnumerable<T> s1,
|
||
IEnumerable<T> s2,
|
||
IEqualityComparer<T> comparer)
|
||
{
|
||
return HpcLinqVertex.AsEnumerable(System.Linq.Enumerable.SequenceEqual(s1, s2, comparer));
|
||
|
||
}
|
||
|
||
// Used in SlidingWindow()
|
||
[Resource(IsStateful = false)]
|
||
public static IEnumerable<T[]> Last<T>(IEnumerable<T> source,
|
||
int windowSize)
|
||
{
|
||
int count = windowSize - 1;
|
||
T[] buffer = new T[count];
|
||
long total = 0;
|
||
foreach (var x in source)
|
||
{
|
||
buffer[total % count] = x;
|
||
total++;
|
||
}
|
||
|
||
if (total < count)
|
||
{
|
||
throw new DryadLinqException(String.Format(SR.PartitionTooSmallForSlidingWindow, count));
|
||
}
|
||
|
||
T[] last = new T[count];
|
||
int startIdx = (int)total % count;
|
||
Array.Copy(buffer, startIdx, last, 0, count - startIdx);
|
||
Array.Copy(buffer, 0, last, count - startIdx, startIdx);
|
||
yield return last;
|
||
}
|
||
|
||
public static IEnumerable<IndexedValue<T[]>> Slide<T>(IEnumerable<T[]> source)
|
||
{
|
||
using (IEnumerator<T[]> sourceEnum = source.GetEnumerator())
|
||
{
|
||
if (sourceEnum.MoveNext())
|
||
{
|
||
yield return new IndexedValue<T[]>(0, new T[0]);
|
||
|
||
int index = 1;
|
||
T[] lastVal = sourceEnum.Current;
|
||
while (sourceEnum.MoveNext())
|
||
{
|
||
yield return new IndexedValue<T[]>(index, lastVal);
|
||
index++;
|
||
lastVal = sourceEnum.Current;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
[Resource(IsStateful = false)]
|
||
public static IEnumerable<T2>
|
||
ProcessWindows<T1, T2>(IEnumerable<IndexedValue<T1[]>> source1,
|
||
IEnumerable<T1> source2,
|
||
Func<IEnumerable<T1>, T2> procFunc,
|
||
Int32 windowSize)
|
||
{
|
||
Window<T1> window = new Window<T1>(windowSize);
|
||
T1[] slided = source1.Single().Value;
|
||
for (int i = 0; i < slided.Length; i++)
|
||
{
|
||
window.Add(slided[i]);
|
||
}
|
||
|
||
using (IEnumerator<T1> sourceEnum = source2.GetEnumerator())
|
||
{
|
||
while (window.Count() < windowSize)
|
||
{
|
||
if (!sourceEnum.MoveNext()) break;
|
||
window.Add(sourceEnum.Current);
|
||
}
|
||
if (window.Count() == windowSize)
|
||
{
|
||
yield return procFunc(window);
|
||
while (sourceEnum.MoveNext())
|
||
{
|
||
window.Add(sourceEnum.Current);
|
||
yield return procFunc(window);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// Calculate the sizes of the partitions. Used for example to implement Concat.
|
||
public static IEnumerable<IndexedValue<T[]>> IndexedCount<T>(IEnumerable<T> source)
|
||
{
|
||
T[] elems = source.ToArray();
|
||
for (int i = 0; i < elems.Length; i++)
|
||
{
|
||
yield return new IndexedValue<T[]>(i, elems);
|
||
}
|
||
}
|
||
|
||
[Resource(IsStateful = false)]
|
||
public static IEnumerable<IndexedValue<T>>
|
||
AddPartitionIndex<T>(IEnumerable<IndexedValue<long[]>> source1, IEnumerable<T> source2, Int32 pcount)
|
||
{
|
||
IndexedValue<long[]> s1 = source1.Single();
|
||
long averageCount = s1.Value.Sum() / pcount;
|
||
long partialCount = 0;
|
||
for (int i = 0; i < s1.Index; i++)
|
||
{
|
||
partialCount += s1.Value[i];
|
||
}
|
||
int partIndex = (int)(partialCount / averageCount);
|
||
long indexInPart = partialCount % averageCount;
|
||
foreach (T elem in source2)
|
||
{
|
||
if (indexInPart >= averageCount && partIndex != pcount-1)
|
||
{
|
||
partIndex++;
|
||
indexInPart = 0;
|
||
}
|
||
yield return new IndexedValue<T>(partIndex, elem);
|
||
indexInPart++;
|
||
}
|
||
}
|
||
|
||
// Produces one dummy item per partition. Used for example to implement Reverse().
|
||
[Resource(IsStateful = false)]
|
||
public static IEnumerable<int> ValueZero<T>(IEnumerable<T> source)
|
||
{
|
||
yield return 0;
|
||
}
|
||
|
||
//Used for Reverse()
|
||
//input: a sequence of n dummy items. eg {0,0,0... } x n
|
||
//output: { {(0,n), (1,n), (2,n), .., (n-1, n)} }
|
||
// item.Index = index
|
||
// item.Value = nPartitions
|
||
public static IEnumerable<IndexedValue<int>> MakeIndexCountPairs(IEnumerable<int> source)
|
||
{
|
||
int count = source.Count();
|
||
for (int i = 0; i < count; i++)
|
||
{
|
||
yield return new IndexedValue<int>(i, count);
|
||
}
|
||
}
|
||
|
||
// Used for Reverse()
|
||
// receives a pair (myIndex, nPartitions) as source1, and a normal sequence as source2.
|
||
// targetIdx = nPartition-myIndex-1
|
||
// produces {(targetIdx, item), (targetIdx, item), ...}
|
||
public static IEnumerable<IndexedValue<T>>
|
||
AddIndexForReverse<T>(IEnumerable<IndexedValue<int>> source1, IEnumerable<T> source2)
|
||
{
|
||
IndexedValue<int> item = source1.Single();
|
||
int myIndex = item.Index;
|
||
int pcount = item.Value;
|
||
int targetIndex = pcount - myIndex - 1;
|
||
foreach (T elem in source2)
|
||
{
|
||
yield return new IndexedValue<T>(targetIndex, elem);
|
||
}
|
||
}
|
||
|
||
// Used in Zip()
|
||
public static IEnumerable<IndexedValue<Pair<long[], long[]>>>
|
||
ZipCount(IEnumerable<long> source1, IEnumerable<long> source2)
|
||
{
|
||
long[] elems1 = source1.ToArray();
|
||
long[] elems2 = source2.ToArray();
|
||
Pair<long[], long[]> pair = new Pair<long[], long[]>(elems1, elems2);
|
||
for (int i = 0; i < elems2.Length; i++)
|
||
{
|
||
yield return new IndexedValue<Pair<long[], long[]>>(i, pair);
|
||
}
|
||
}
|
||
|
||
public static IEnumerable<IndexedValue<T>>
|
||
AssignPartitionIndex<T>(IEnumerable<IndexedValue<Pair<long[], long[]>>> source1,
|
||
IEnumerable<T> source2)
|
||
{
|
||
IndexedValue<Pair<long[], long[]>> s1 = source1.Single();
|
||
long[] elems1 = s1.Value.Key;
|
||
long[] elems2 = s1.Value.Value;
|
||
|
||
long partialCount = 0;
|
||
for (int i = 0; i < s1.Index; i++)
|
||
{
|
||
partialCount += elems2[i];
|
||
}
|
||
int partIndex = 0;
|
||
for (partIndex = 0; partIndex < elems1.Length; partIndex++)
|
||
{
|
||
partialCount -= elems1[partIndex];
|
||
if (partialCount < 0) break;
|
||
}
|
||
if (partialCount < 0)
|
||
{
|
||
foreach (T elem in source2)
|
||
{
|
||
yield return new IndexedValue<T>(partIndex, elem);
|
||
partialCount++;
|
||
if (partialCount == 0)
|
||
{
|
||
for (partIndex = partIndex + 1; partIndex < elems1.Length; partIndex++)
|
||
{
|
||
partialCount = -elems1[partIndex];
|
||
if (partialCount < 0) break;
|
||
}
|
||
if (partialCount == 0) break;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// Used in SelectWithPartitionIndex()
|
||
public static IEnumerable<int> AssignIndex(IEnumerable<int> source)
|
||
{
|
||
int index = 0;
|
||
foreach (int elem in source)
|
||
{
|
||
yield return index;
|
||
index++;
|
||
}
|
||
}
|
||
|
||
public static IEnumerable<T2>
|
||
ProcessWithIndex<T1, T2>(IEnumerable<T1> source1,
|
||
IEnumerable<int> source2,
|
||
Func<IEnumerable<T1>, int, IEnumerable<T2>> procFunc)
|
||
{
|
||
int index = source2.Single();
|
||
return procFunc(source1, index);
|
||
}
|
||
|
||
public static IEnumerable<T2>
|
||
ProcessWithIndex<T1, T2>(IEnumerable<T1> source1,
|
||
IEnumerable<int> source2,
|
||
Func<T1, int, T2> procFunc)
|
||
{
|
||
int index = source2.Single();
|
||
return HpcLinqVertex.Select(source1, x => procFunc(x, index), true);
|
||
}
|
||
}
|
||
|
||
internal class Window<T> : IEnumerable<T>
|
||
{
|
||
private T[] m_elems;
|
||
private int m_startIdx;
|
||
private int m_count;
|
||
|
||
public Window(int len)
|
||
{
|
||
this.m_elems = new T[len];
|
||
this.m_startIdx = 0;
|
||
this.m_count = 0;
|
||
}
|
||
|
||
public void Add(T elem)
|
||
{
|
||
int nextIdx = this.m_startIdx + this.m_count;
|
||
if (nextIdx >= this.m_elems.Length)
|
||
{
|
||
nextIdx -= this.m_elems.Length;
|
||
}
|
||
this.m_elems[nextIdx] = elem;
|
||
if (this.m_count < this.m_elems.Length)
|
||
{
|
||
this.m_count++;
|
||
}
|
||
else
|
||
{
|
||
this.m_startIdx++;
|
||
if (this.m_startIdx == this.m_elems.Length)
|
||
{
|
||
this.m_startIdx = 0;
|
||
}
|
||
}
|
||
}
|
||
|
||
public int Count()
|
||
{
|
||
return this.m_count;
|
||
}
|
||
|
||
#region IEnumerable and IEnumerable<T> members
|
||
IEnumerator IEnumerable.GetEnumerator()
|
||
{
|
||
return this.GetEnumerator();
|
||
}
|
||
|
||
public IEnumerator<T> GetEnumerator()
|
||
{
|
||
int idx = this.m_startIdx;
|
||
for (int i = 0; i < this.m_count; i++)
|
||
{
|
||
yield return this.m_elems[idx];
|
||
idx++;
|
||
if (idx == this.m_elems.Length) idx = 0;
|
||
}
|
||
}
|
||
#endregion
|
||
}
|
||
}
|
||
|