/* Copyright (c) Microsoft Corporation All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, MERCHANTABLITY OR NON-INFRINGEMENT. See the Apache Version 2.0 License for specific language governing permissions and limitations under the License. */ // // � Microsoft Corporation. All rights reserved. // using System; using System.Collections.Generic; using System.IO; using System.Runtime.Serialization; using System.Runtime.Serialization.Formatters.Binary; using System.Linq; using System.Linq.Expressions; using System.Reflection; using System.Diagnostics; using Microsoft.Research.DryadLinq.Internal; namespace Microsoft.Research.DryadLinq { // The information we know about the dataset at each stage of the // computation. For each operator, we try to compute this from the // DataSetInfo of its input datasets and the semantics of the operator. [Serializable] internal class DataSetInfo { internal static PartitionInfo OnePartition = new RandomPartition(1); internal static OrderByInfo NoOrderBy = new OrderByInfo(); internal static DistinctInfo NoDistinct = new DistinctInfo(); internal PartitionInfo partitionInfo; internal OrderByInfo orderByInfo; internal DistinctInfo distinctInfo; internal DataSetInfo() { this.partitionInfo = OnePartition; this.orderByInfo = NoOrderBy; this.distinctInfo = NoDistinct; } internal DataSetInfo(PartitionInfo pinfo, OrderByInfo oinfo, DistinctInfo dinfo) { this.partitionInfo = pinfo; this.orderByInfo = oinfo; this.distinctInfo = dinfo; } internal DataSetInfo(DataSetInfo info) { this.partitionInfo = info.partitionInfo; this.orderByInfo = info.orderByInfo; this.distinctInfo = info.distinctInfo; } // Return true iff the entire dataset is ordered. internal bool IsOrderedBy(LambdaExpression keySel, object comparer) { return (this.partitionInfo.ParType == PartitionType.Range && this.partitionInfo.IsPartitionedBy(keySel, comparer) && this.orderByInfo.IsOrderedBy(keySel, comparer) && this.orderByInfo.IsSameMonotoncity(this.partitionInfo)); } internal static DataSetInfo Read(Stream fstream) { BinaryFormatter bfm = new BinaryFormatter(); return (DataSetInfo)bfm.Deserialize(fstream); } internal static void Write(DataSetInfo dsInfo, Stream fstream) { BinaryFormatter bfm = new BinaryFormatter(); bfm.Serialize(fstream, dsInfo); } } internal enum PartitionType { Random = 0x0000, Hash = 0x0001, Range = 0x0002, HashOrRange = 0x0003 } internal abstract class PartitionInfo { private PartitionType m_partitionType; protected PartitionInfo(PartitionType parType) { this.m_partitionType = parType; } internal PartitionType ParType { get { return this.m_partitionType; } } internal virtual bool IsDescending { get { throw new InvalidOperationException(); } } internal virtual bool HasKeys { get { throw new InvalidOperationException(); } } internal virtual PartitionInfo Concat(PartitionInfo p) { return new RandomPartition(this.Count + p.Count); } internal abstract int Count { get; set; } internal abstract bool IsPartitionedBy(LambdaExpression keySel); internal abstract bool IsPartitionedBy(LambdaExpression keySel, object comparer); internal abstract bool IsPartitionedBy(LambdaExpression keySel, object comparer, bool isDescending); internal abstract bool IsSamePartition(PartitionInfo p); internal abstract DryadQueryNode CreatePartitionNode(LambdaExpression keySelector, DryadQueryNode child); internal abstract PartitionInfo Create(LambdaExpression keySel); internal abstract PartitionInfo Rewrite(LambdaExpression keySel, ParameterExpression param); internal static PartitionInfo CreateHash(LambdaExpression keySel, int count, object comparer, Type keyType) { Type hashType = typeof(HashPartition<>).MakeGenericType(keyType); object[] args = new object[] { keySel, count, comparer }; return (PartitionInfo)Activator.CreateInstance(hashType, BindingFlags.NonPublic | BindingFlags.Instance, null ,args, null); } internal static PartitionInfo CreateRange(LambdaExpression keySel, object keys, object comparer, bool? isDescending, Int32 parCnt, Type keyType) { Type parType = typeof(RangePartition<>).MakeGenericType(keyType); object[] args = new object[] { keySel, keys, comparer, isDescending, parCnt }; try { return (PartitionInfo)Activator.CreateInstance(parType, BindingFlags.NonPublic | BindingFlags.Instance, null, args, null); } catch (TargetInvocationException tie) { // The ctor for RangePartition<> can throw.. we trap and rethrow the useful exception here. if (tie.InnerException != null) throw tie.InnerException; else throw; } } internal virtual Pair GetOperator() { throw new InvalidOperationException(); } } internal class RandomPartition : PartitionInfo { private int m_count; internal RandomPartition(int count) : base(PartitionType.Random) { this.m_count = count; } internal override int Count { get { return this.m_count; } set { this.m_count = value; } } internal override bool IsPartitionedBy(LambdaExpression keySel) { return false; } internal override bool IsPartitionedBy(LambdaExpression keySel, object comparer) { return false; } internal override bool IsPartitionedBy(LambdaExpression keySel, object comparer, bool isDescending) { return false; } internal override bool IsSamePartition(PartitionInfo p) { return false; } internal override DryadQueryNode CreatePartitionNode(LambdaExpression keySel, DryadQueryNode child) { throw new DryadLinqException(HpcLinqErrorCode.CannotCreatePartitionNodeRandom, SR.CannotCreatePartitionNodeRandom); } internal override PartitionInfo Create(LambdaExpression keySel) { return this; } internal override PartitionInfo Rewrite(LambdaExpression resultSel, ParameterExpression param) { return this; } } internal class RangePartition : PartitionInfo { private int m_count; private LambdaExpression m_keySelector; private TKey[] m_partitionKeys; private IComparer m_comparer; private bool m_isDescending; internal RangePartition(LambdaExpression keySelector, TKey[] partitionKeys, IComparer comparer) : this(keySelector, partitionKeys, comparer, null, 1) { } internal RangePartition(LambdaExpression keySelector, TKey[] partitionKeys, IComparer comparer, bool? isDescending, Int32 parCnt) : base(PartitionType.Range) { this.m_count = (partitionKeys == null) ? parCnt : (partitionKeys.Length + 1); this.m_keySelector = keySelector; this.m_partitionKeys = partitionKeys; this.m_comparer = TypeSystem.GetComparer(comparer); if (isDescending == null) { if (partitionKeys == null) { throw new DryadLinqException(HpcLinqErrorCode.PartitionKeysNotProvided, SR.PartitionKeysNotProvided); } bool? detectedIsDescending; if (!HpcLinqUtil.ComputeIsDescending(partitionKeys, m_comparer, out detectedIsDescending)) { throw new DryadLinqException(HpcLinqErrorCode.PartitionKeysAreNotConsistentlyOrdered, SR.PartitionKeysAreNotConsistentlyOrdered); } this.m_isDescending = detectedIsDescending ?? false; } else { this.m_isDescending = isDescending.GetValueOrDefault(); if (partitionKeys != null && !HpcLinqUtil.IsOrdered(partitionKeys, this.m_comparer, this.m_isDescending)) { throw new DryadLinqException(HpcLinqErrorCode.IsDescendingIsInconsistent, SR.IsDescendingIsInconsistent); } } } internal RangePartition(LambdaExpression keySelector, IComparer comparer, bool isDescending, Int32 parCnt) : base(PartitionType.Range) { this.m_count = parCnt; this.m_keySelector = keySelector; this.m_partitionKeys = null; this.m_comparer = TypeSystem.GetComparer(comparer); } internal TKey[] Keys { get { return this.m_partitionKeys; } } internal Expression KeysExpression { get { return Expression.Constant(this.m_partitionKeys); } } internal Expression KeySelector { get { return this.m_keySelector; } } internal IComparer Comparer { get { return this.m_comparer; } } internal override bool IsDescending { get { return this.m_isDescending; } } internal override bool HasKeys { get { return this.m_partitionKeys != null; } } internal override int Count { get { return this.m_count; } set { this.m_count = value; } } internal override bool IsPartitionedBy(LambdaExpression keySel) { // Match the key selector functions: if (this.m_keySelector == null) { return (keySel == null); } if (keySel == null) return false; return ExpressionMatcher.Match(this.m_keySelector, keySel); } internal override bool IsPartitionedBy(LambdaExpression keySel, object comp) { // Match the key selector functions: if (!this.IsPartitionedBy(keySel)) { return false; } // Check the comparers: IComparer comp1 = TypeSystem.GetComparer(comp); if (comp1 == null) return false; return this.m_comparer.Equals(comp1); } internal override bool IsPartitionedBy(LambdaExpression keySel, object comp, bool isDescending) { // Match the key selector functions: if (!this.IsPartitionedBy(keySel)) { return false; } // Check the comparers: IComparer comp1 = TypeSystem.GetComparer(comp); if (comp1 == null) return false; if (this.m_isDescending != isDescending) { comp1 = new MinusComparer(comp1); } return this.m_comparer.Equals(comp1); } internal override bool IsSamePartition(PartitionInfo p) { RangePartition p1 = p as RangePartition; if (p1 == null) return false; // Check the keys: if (this.Keys == null || p1.Keys == null || this.Keys.Length != p1.Keys.Length) { return false; } IComparer comp1 = TypeSystem.GetComparer(p1.m_comparer); if (comp1 == null) return false; if (this.IsDescending != p1.IsDescending) { comp1 = new MinusComparer(comp1); } for (int i = 0; i < this.Keys.Length; i++) { if (this.m_comparer.Compare(this.Keys[i], p1.Keys[i]) != 0) { return false; } } // Check the comparers: return this.m_comparer.Equals(p1.m_comparer); } internal override DryadQueryNode CreatePartitionNode(LambdaExpression keySel, DryadQueryNode child) { Expression keysExpr = Expression.Constant(this.m_partitionKeys); Expression comparerExpr = Expression.Constant(this.m_comparer, typeof(IComparer)); Expression isDescendingExpr = Expression.Constant(this.m_isDescending); return new DryadRangePartitionNode(keySel, null, keysExpr, comparerExpr, isDescendingExpr, null, child.QueryExpression, child); } internal override PartitionInfo Create(LambdaExpression keySel) { Type keyType = keySel.Body.Type; return PartitionInfo.CreateRange(keySel, this.Keys, this.m_comparer, this.m_isDescending, this.Count, keyType); } internal override PartitionInfo Rewrite(LambdaExpression resultSel, ParameterExpression param) { ParameterExpression a = this.m_keySelector.Parameters[0]; Substitution pSubst = Substitution.Empty.Cons(a, param); LambdaExpression newKeySel = HpcLinqExpression.Rewrite(this.m_keySelector, resultSel, pSubst); if (newKeySel == null) { return new RandomPartition(this.m_count); } return this.Create(newKeySel); } internal override Pair GetOperator() { Type sourceType = this.m_keySelector.Parameters[0].Type; MethodInfo operation = TypeSystem.FindStaticMethod( typeof(Microsoft.Research.DryadLinq.HpcLinqQueryable), "RangePartition", new Type[] { typeof(IQueryable<>).MakeGenericType(sourceType), m_keySelector.GetType(), m_partitionKeys.GetType(), m_comparer.GetType(), typeof(bool) }, new Type[] { sourceType, typeof(TKey) }); Expression[] arguments = new Expression[] { this.m_keySelector, Expression.Constant(this.m_partitionKeys), Expression.Constant(this.m_comparer, typeof(IComparer)), Expression.Constant(this.m_isDescending) }; return new Pair(operation, arguments); } } internal class HashPartition : PartitionInfo { private int m_count; private LambdaExpression m_keySelector; private IEqualityComparer m_comparer; internal HashPartition(LambdaExpression keySelector, int count) : this(keySelector, count, null) { } internal HashPartition(LambdaExpression keySelector, int count, IEqualityComparer eqComparer) : base(PartitionType.Hash) { this.m_count = count; this.m_keySelector = keySelector; this.m_comparer = (eqComparer == null) ? EqualityComparer.Default : eqComparer; } internal Expression KeySelector { get { return this.m_keySelector; } } internal IEqualityComparer EqualityComparer { get { return this.m_comparer; } } internal override int Count { get { return this.m_count; } set { this.m_count = value; } } internal override bool IsPartitionedBy(LambdaExpression keySel) { // Match the key selector functions: if (this.m_keySelector == null) { return (keySel == null); } if (keySel == null) return false; return ExpressionMatcher.Match(this.m_keySelector, keySel); } internal override bool IsPartitionedBy(LambdaExpression keySel, object comp) { // Match the key selector functions: if (!this.IsPartitionedBy(keySel)) { return false; } // Check the comparers: IEqualityComparer comp1 = TypeSystem.GetEqualityComparer(comp); if (comp1 == null) return false; return this.m_comparer.Equals(comp1); } internal override bool IsPartitionedBy(LambdaExpression keySel, object comparer, bool isDescending) { return this.IsPartitionedBy(keySel, comparer); } internal override bool IsSamePartition(PartitionInfo p) { HashPartition p1 = p as HashPartition; if (p1 == null || this.Count != p1.Count) { return false; } // Check the comparers: return this.m_comparer.Equals(p1.m_comparer); } internal override DryadQueryNode CreatePartitionNode(LambdaExpression keySel, DryadQueryNode child) { Expression comparerExpr = Expression.Constant(this.m_comparer, typeof(IEqualityComparer)); return new DryadHashPartitionNode(keySel, comparerExpr, this.Count, child.QueryExpression, child); } internal override PartitionInfo Create(LambdaExpression keySel) { Type keyType = keySel.Body.Type; return PartitionInfo.CreateHash(keySel, this.Count, this.m_comparer, keyType); } internal override PartitionInfo Rewrite(LambdaExpression resultSel, ParameterExpression param) { ParameterExpression a = this.m_keySelector.Parameters[0]; Substitution pSubst = Substitution.Empty.Cons(a, param); LambdaExpression newKeySel = HpcLinqExpression.Rewrite(this.m_keySelector, resultSel, pSubst); if (newKeySel == null) { return new RandomPartition(this.m_count); } return this.Create(newKeySel); } internal override Pair GetOperator() { Type sourceType = this.m_keySelector.Parameters[0].Type; MethodInfo operation = TypeSystem.FindStaticMethod( typeof(Microsoft.Research.DryadLinq.HpcLinqQueryable), "HashPartition", new Type[] { typeof(IQueryable<>).MakeGenericType(sourceType), m_keySelector.GetType(), m_comparer.GetType(), typeof(int) }, new Type[] { sourceType, typeof(TKey) }); Expression[] arguments = new Expression[] { m_keySelector, Expression.Constant(this.m_comparer, typeof(IEqualityComparer)), Expression.Constant(this.Count) }; return new Pair(operation, arguments); } } internal class OrderByInfo { internal virtual bool IsOrdered { get { return false; } } internal virtual LambdaExpression KeySelector { get { return null; } } internal virtual Expression Comparer { get { return null; } } internal virtual bool IsDescending { get { return false; } } internal virtual bool IsOrderedBy(LambdaExpression keySel) { return false; } internal virtual bool IsOrderedBy(LambdaExpression keySel, object comparer) { return false; } internal virtual bool IsOrderedBy(LambdaExpression keySel, object comparer, bool isDescending) { return false; } internal virtual bool IsSameMonotoncity(PartitionInfo pinfo) { return false; } internal static OrderByInfo Create(Expression keySel, object comparer, bool isDescending, Type keyType) { Type infoType = typeof(OrderByInfo<>).MakeGenericType(keyType); object[] args = new object[] { keySel, comparer, isDescending }; return (OrderByInfo)Activator.CreateInstance(infoType, BindingFlags.NonPublic | BindingFlags.Instance, null, args, null); } internal virtual OrderByInfo Create(LambdaExpression keySel) { return DataSetInfo.NoOrderBy; } internal virtual OrderByInfo Rewrite(LambdaExpression resultSel, ParameterExpression param) { return DataSetInfo.NoOrderBy; } } internal class OrderByInfo : OrderByInfo { private LambdaExpression m_keySelector; private IComparer m_comparer; private bool m_isDescending; internal OrderByInfo(LambdaExpression keySelector, IComparer comparer, bool isDescending) { this.m_keySelector = keySelector; this.m_comparer = TypeSystem.GetComparer(comparer); this.m_isDescending = isDescending; } internal override LambdaExpression KeySelector { get { return this.m_keySelector; } } internal override Expression Comparer { get { return Expression.Constant(this.m_comparer, typeof(IComparer)); } } internal override bool IsDescending { get { return this.m_isDescending; } } internal override bool IsOrdered { get { return true; } } internal override bool IsOrderedBy(LambdaExpression keySel) { if (this.m_keySelector == null) { return (keySel == null); } if (keySel == null) return false; return ExpressionMatcher.Match(this.m_keySelector, keySel); } internal override bool IsOrderedBy(LambdaExpression keySel, object comp) { // Match the key selector functions: if (!this.IsOrderedBy(keySel)) { return false; } // Check the comparers: IComparer comp1 = TypeSystem.GetComparer(comp); if (comp1 == null) return false; return this.m_comparer.Equals(comp1); } internal override bool IsOrderedBy(LambdaExpression keySel, object comp, bool isDescending) { // Match the key selector functions: if (!this.IsOrderedBy(keySel)) { return false; } // Check the comparers: IComparer comp1 = TypeSystem.GetComparer(comp); if (comp1 == null) return false; if (this.IsDescending != isDescending) { comp1 = new MinusComparer(comp1); } return this.m_comparer.Equals(comp1); } internal override bool IsSameMonotoncity(PartitionInfo pinfo) { RangePartition pinfo1 = pinfo as RangePartition; if (pinfo1 == null) return false; IComparer comp1 = pinfo1.Comparer; if (this.m_isDescending != pinfo1.IsDescending) { comp1 = new MinusComparer(comp1); } return this.m_comparer.Equals(comp1); } internal override OrderByInfo Create(LambdaExpression keySel) { Type keyType = keySel.Body.Type; return OrderByInfo.Create(keySel, this.m_comparer, this.m_isDescending, keyType); } internal override OrderByInfo Rewrite(LambdaExpression resultSel, ParameterExpression param) { ParameterExpression a = this.m_keySelector.Parameters[0]; Substitution pSubst = Substitution.Empty.Cons(a, param); LambdaExpression newKeySel = HpcLinqExpression.Rewrite(this.m_keySelector, resultSel, pSubst); if (newKeySel == null) { return DataSetInfo.NoOrderBy; } return this.Create(newKeySel); } } internal class DistinctInfo { internal virtual bool IsDistinct() { return false; } internal virtual bool IsDistinct(object comp) { return false; } internal virtual bool IsSameDistinct(DistinctInfo dist) { return false; } internal static DistinctInfo Create(object comparer, Type type) { Type infoType = typeof(DistinctInfo<>).MakeGenericType(type); object[] args = new object[] { comparer }; return (DistinctInfo)Activator.CreateInstance(infoType, BindingFlags.NonPublic | BindingFlags.Instance, null ,args, null); } } internal class DistinctInfo : DistinctInfo { private IEqualityComparer m_comparer; internal Expression Comparer { get { return Expression.Constant(this.m_comparer, typeof(IEqualityComparer)); } } internal DistinctInfo(IEqualityComparer comparer) { this.m_comparer = (comparer == null) ? EqualityComparer.Default : comparer; } internal override bool IsDistinct() { return true; } internal override bool IsDistinct(object comp) { IEqualityComparer comp1 = TypeSystem.GetEqualityComparer(comp); if (comp1 == null) return false; return this.m_comparer.Equals(comp1); } internal override bool IsSameDistinct(DistinctInfo dist) { DistinctInfo info = dist as DistinctInfo; if (info == null) return false; else return IsDistinct(info.Comparer); } } }