/* Copyright (c) Microsoft Corporation All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, MERCHANTABLITY OR NON-INFRINGEMENT. See the Apache Version 2.0 License for specific language governing permissions and limitations under the License. */ using System; using System.Collections; using System.Collections.Generic; using System.IO; using System.Globalization; using System.Reflection; using System.Linq.Expressions; using System.Linq; #pragma warning disable 1591 namespace Microsoft.Research.DryadLinq.Internal { /// /// DryadLINQ helper functions. They are mainly used in the implementation of DryadLINQ. /// /// A DryadLINQ user should not need to use DryadLinqHelper directly. public static class DryadLinqHelper { /// /// Check if an input source is sorted. /// /// The type of input record. /// The type of key. /// The input source. /// The key selection function. /// A comparer object used to compare keys. /// True if the check is for descending /// [Resource(IsStateful = false)] public static IEnumerable CheckSort(IEnumerable source, Expression> keySelector, IComparer comparer, bool isDescending) { Func keySel = keySelector.Compile(); comparer = TypeSystem.GetComparer(comparer); IEnumerator elems = source.GetEnumerator(); if (elems.MoveNext()) { TSource curElem = elems.Current; yield return curElem; TKey curKey = keySel(curElem); while (elems.MoveNext()) { TSource nextElem = elems.Current; yield return nextElem; TKey nextKey = keySel(nextElem); int cmp = comparer.Compare(curKey, nextKey); int cmpRes = (isDescending) ? -cmp : cmp; if (cmpRes > 0) { throw new DryadLinqException(SR.SourceNotOrdered); } curKey = nextKey; } } } /// /// Apply a function to the cross product of two input sequences. /// /// The record type of the first input. /// The record type of the second input. /// The record yype of the result. /// The first input. /// The second input. /// The function to apply. /// The result of applying the function to the cross product of two inputs. public static IEnumerable Cross(IEnumerable s1, IEnumerable s2, Expression> procFunc) { Func proc = procFunc.Compile(); bool useRight = true; if ((s1 is DryadLinqVertexReader) && (s2 is DryadLinqVertexReader)) { Int64 leftLen = ((DryadLinqVertexReader)s1).GetTotalLength(); Int64 rightLen = ((DryadLinqVertexReader)s2).GetTotalLength(); if (leftLen >= 0 && rightLen >= 0) { useRight = rightLen <= leftLen; } } if (useRight) { List elems2 = s2.ToList(); foreach (var elem1 in s1) { foreach (var elem2 in elems2) { yield return proc(elem1, elem2); } } } else { List elems1 = s1.ToList(); foreach (var elem2 in s2) { foreach (var elem1 in elems1) { yield return proc(elem1, elem2); } } } } /// /// Determines whether two sequences are equal according to an equality comparer /// /// The record type of the sequences. /// The first sequence. /// The second sequence. /// An equality comparer. /// true iff the two sequences are equal. public static IEnumerable SequenceEqual(IEnumerable s1, IEnumerable s2, IEqualityComparer comparer) { return DryadLinqVertex.AsEnumerable(System.Linq.Enumerable.SequenceEqual(s1, s2, comparer)); } // Used in SlidingWindow() /// /// Returns the last windowSize-1 records in a sequence. /// /// The record type of the sequence. /// The input sequence. /// The window size /// The last windowSize-1 records as an array. /// Used in sliding windows computations. [Resource(IsStateful = false)] public static IEnumerable Last(IEnumerable source, int windowSize) { int count = windowSize - 1; T[] buffer = new T[count]; long total = 0; foreach (var x in source) { buffer[total % count] = x; total++; } if (total < count) { throw new DryadLinqException(String.Format(SR.PartitionTooSmallForSlidingWindow, count)); } T[] last = new T[count]; int startIdx = (int)total % count; Array.Copy(buffer, startIdx, last, 0, count - startIdx); Array.Copy(buffer, 0, last, count - startIdx, startIdx); yield return last; } public static IEnumerable> Slide(IEnumerable source) { using (IEnumerator sourceEnum = source.GetEnumerator()) { if (sourceEnum.MoveNext()) { yield return new IndexedValue(0, new T[0]); int index = 1; T[] lastVal = sourceEnum.Current; while (sourceEnum.MoveNext()) { yield return new IndexedValue(index, lastVal); index++; lastVal = sourceEnum.Current; } } } } [Resource(IsStateful = false)] public static IEnumerable ProcessWindows(IEnumerable> source1, IEnumerable source2, Func, T2> procFunc, Int32 windowSize) { Window window = new Window(windowSize); T1[] slided = source1.Single().Value; for (int i = 0; i < slided.Length; i++) { window.Add(slided[i]); } using (IEnumerator sourceEnum = source2.GetEnumerator()) { while (window.Count() < windowSize) { if (!sourceEnum.MoveNext()) break; window.Add(sourceEnum.Current); } if (window.Count() == windowSize) { yield return procFunc(window); while (sourceEnum.MoveNext()) { window.Add(sourceEnum.Current); yield return procFunc(window); } } } } // Calculate the sizes of the partitions. Used for example to implement Concat. public static IEnumerable> IndexedCount(IEnumerable source) { T[] elems = source.ToArray(); for (int i = 0; i < elems.Length; i++) { yield return new IndexedValue(i, elems); } } [Resource(IsStateful = false)] public static IEnumerable> AddPartitionIndex(IEnumerable> source1, IEnumerable source2, Int32 pcount) { IndexedValue s1 = source1.Single(); long averageCount = s1.Value.Sum() / pcount; long partialCount = 0; for (int i = 0; i < s1.Index; i++) { partialCount += s1.Value[i]; } int partIndex = (int)(partialCount / averageCount); long indexInPart = partialCount % averageCount; foreach (T elem in source2) { if (indexInPart >= averageCount && partIndex != pcount-1) { partIndex++; indexInPart = 0; } yield return new IndexedValue(partIndex, elem); indexInPart++; } } // Produces one dummy item per partition. Used for example to implement Reverse(). [Resource(IsStateful = false)] public static IEnumerable ValueZero(IEnumerable source) { yield return 0; } //Used for Reverse() //input: a sequence of n dummy items. eg {0,0,0... } x n //output: { {(0,n), (1,n), (2,n), .., (n-1, n)} } // item.Index = index // item.Value = nPartitions public static IEnumerable> MakeIndexCountPairs(IEnumerable source) { int count = source.Count(); for (int i = 0; i < count; i++) { yield return new IndexedValue(i, count); } } // Used for Reverse() // receives a pair (myIndex, nPartitions) as source1, and a normal sequence as source2. // targetIdx = nPartition-myIndex-1 // produces {(targetIdx, item), (targetIdx, item), ...} public static IEnumerable> AddIndexForReverse(IEnumerable> source1, IEnumerable source2) { IndexedValue item = source1.Single(); int myIndex = item.Index; int pcount = item.Value; int targetIndex = pcount - myIndex - 1; foreach (T elem in source2) { yield return new IndexedValue(targetIndex, elem); } } // Used in Zip() public static IEnumerable>> ZipCount(IEnumerable source1, IEnumerable source2) { long[] elems1 = source1.ToArray(); long[] elems2 = source2.ToArray(); Pair pair = new Pair(elems1, elems2); for (int i = 0; i < elems2.Length; i++) { yield return new IndexedValue>(i, pair); } } public static IEnumerable> AssignPartitionIndex(IEnumerable>> source1, IEnumerable source2) { IndexedValue> s1 = source1.Single(); long[] elems1 = s1.Value.Key; long[] elems2 = s1.Value.Value; long partialCount = 0; for (int i = 0; i < s1.Index; i++) { partialCount += elems2[i]; } int partIndex = 0; for (partIndex = 0; partIndex < elems1.Length; partIndex++) { partialCount -= elems1[partIndex]; if (partialCount < 0) break; } if (partialCount < 0) { foreach (T elem in source2) { yield return new IndexedValue(partIndex, elem); partialCount++; if (partialCount == 0) { for (partIndex = partIndex + 1; partIndex < elems1.Length; partIndex++) { partialCount = -elems1[partIndex]; if (partialCount < 0) break; } if (partialCount == 0) break; } } } } // Used in SelectWithPartitionIndex() public static IEnumerable AssignIndex(IEnumerable source) { int index = 0; foreach (int elem in source) { yield return index; index++; } } public static IEnumerable ApplyWithPartitionIndex(IEnumerable source1, IEnumerable source2, Func, int, IEnumerable> procFunc) { int index = source2.Single(); return procFunc(source1, index); } public static void CheckVertexDebugRequest() { string debugEnvVar = Environment.GetEnvironmentVariable("DRYADLINQ_DEBUGVERTEX"); if (debugEnvVar == null) return; if (String.Compare(debugEnvVar, "LAUNCH", StringComparison.OrdinalIgnoreCase) == 0) { System.Diagnostics.Debugger.Launch(); } else { DryadLinqLog.AddInfo("Waiting for debugger to attach..."); while (!System.Diagnostics.Debugger.IsAttached) { System.Threading.Thread.Sleep(1000); } System.Diagnostics.Debugger.Break(); } } } internal class Window : IEnumerable { private T[] m_elems; private int m_startIdx; private int m_count; public Window(int len) { this.m_elems = new T[len]; this.m_startIdx = 0; this.m_count = 0; } public void Add(T elem) { int nextIdx = this.m_startIdx + this.m_count; if (nextIdx >= this.m_elems.Length) { nextIdx -= this.m_elems.Length; } this.m_elems[nextIdx] = elem; if (this.m_count < this.m_elems.Length) { this.m_count++; } else { this.m_startIdx++; if (this.m_startIdx == this.m_elems.Length) { this.m_startIdx = 0; } } } public int Count() { return this.m_count; } #region IEnumerable and IEnumerable members IEnumerator IEnumerable.GetEnumerator() { return this.GetEnumerator(); } public IEnumerator GetEnumerator() { int idx = this.m_startIdx; for (int i = 0; i < this.m_count; i++) { yield return this.m_elems[idx]; idx++; if (idx == this.m_elems.Length) idx = 0; } } #endregion } }