/* Copyright (c) Microsoft Corporation All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, MERCHANTABLITY OR NON-INFRINGEMENT. See the Apache Version 2.0 License for specific language governing permissions and limitations under the License. */ // // � Microsoft Corporation. All rights reserved. // using System; using System.Collections; using System.Collections.Generic; using System.Text; using System.IO; using System.Linq; using System.Linq.Expressions; using System.Reflection; using System.Xml; using System.Diagnostics; using System.Xml.Linq; using System.Drawing; using System.Drawing.Drawing2D; using Microsoft.Research.DryadLinq.Internal; namespace Microsoft.Research.DryadLinq { //@@TODO[P1]: update for new APIs. /// /// This class explains in detail the generated plan. /// internal sealed class DryadQueryExplain { /// /// Visit the set of nodes in the query plan and build an explanation of the plan. /// /// Return plan description here. /// Nodes to explain. internal void CodeShowVisit(StringBuilder plan, DryadQueryNode[] nodes) { HashSet visited = new HashSet(); foreach (DryadQueryNode n in nodes) { CodeShowVisit(plan, n, visited); } } /// /// Helper for CodeShowVisit: do not revisit a node twice. /// /// Return plan here. /// Node to explain. /// Set of nodes already visited. private void CodeShowVisit(StringBuilder plan, DryadQueryNode n, HashSet visited) { if (visited.Contains(n)) return; visited.Add(n); foreach (DryadQueryNode c in n.Children) { CodeShowVisit(plan, c, visited); } ExplainNode(plan, n); } /// /// Explain one query node. /// /// Return plan here. /// Node to explain. internal static void ExplainNode(StringBuilder plan, DryadQueryNode n) { if (n is DryadTeeNode || n is DryadOutputNode) { return; } else if (n is DryadInputNode) { plan.AppendLine("Input:"); plan.Append("\t"); n.BuildString(plan); plan.AppendLine(); return; } plan.Append(n.m_vertexEntryMethod); plan.AppendLine(":"); HashSet allchildren = new HashSet(); if (n is DryadSuperNode) { DryadSuperNode sn = n as DryadSuperNode; List tovisit = new List(); tovisit.Add(sn.RootNode); while (tovisit.Count > 0) { DryadQueryNode t = tovisit[0]; tovisit.RemoveAt(0); if (!(t is DryadSuperNode)) allchildren.Add(t); foreach (DryadQueryNode tc in t.Children) { if (!allchildren.Contains(tc) && sn.Contains(tc)) tovisit.Add(tc); } } } else allchildren.Add(n); foreach (DryadQueryNode nc in allchildren.Reverse()) { Expression expression = null; // expression to print List additional = new List(); // additional arguments to print int argsToSkip = 0; string methodname = nc.OpName; plan.Append("\t"); if (nc is DryadMergeNode) { expression = ((DryadMergeNode)nc).ComparerExpression; } else if (nc is DryadHashPartitionNode) { DryadHashPartitionNode hp = (DryadHashPartitionNode)nc; expression = hp.KeySelectExpression; additional.Add(hp.NumberOfPartitions.ToString()); } else if (nc is DryadGroupByNode) { DryadGroupByNode gb = (DryadGroupByNode)nc; expression = gb.KeySelectExpression; if (gb.ElemSelectExpression != null) additional.Add(HpcLinqExpression.Summarize(gb.ElemSelectExpression)); if (gb.ResSelectExpression != null) additional.Add(HpcLinqExpression.Summarize(gb.ResSelectExpression)); if (gb.ComparerExpression != null) additional.Add(HpcLinqExpression.Summarize(gb.ComparerExpression)); if (gb.SeedExpression != null) additional.Add(HpcLinqExpression.Summarize(gb.SeedExpression)); if (gb.AccumulatorExpression != null) additional.Add(HpcLinqExpression.Summarize(gb.AccumulatorExpression)); } else if (nc is DryadOrderByNode) { DryadOrderByNode ob = (DryadOrderByNode)nc; expression = ob.KeySelectExpression; if (ob.ComparerExpression != null) additional.Add(HpcLinqExpression.Summarize(ob.ComparerExpression)); } else if (nc is DryadWhereNode) { expression = ((DryadWhereNode)nc).WhereExpression; } else if (nc is DryadSelectNode) { DryadSelectNode s = (DryadSelectNode)nc; expression = s.SelectExpression; if (s.ResultSelectExpression != null) additional.Add(HpcLinqExpression.Summarize(s.ResultSelectExpression)); } else if (nc is DryadAggregateNode) { DryadAggregateNode a = (DryadAggregateNode)nc; expression = a.FuncLambda; if (a.SeedExpression != null) additional.Add(HpcLinqExpression.Summarize(a.SeedExpression)); if (a.ResultLambda != null) additional.Add(HpcLinqExpression.Summarize(a.ResultLambda)); } else if (nc is DryadPartitionOpNode) { expression = ((DryadPartitionOpNode)nc).ControlExpression; } else if (nc is DryadJoinNode) { DryadJoinNode j = (DryadJoinNode)nc; expression = j.OuterKeySelectorExpression; additional.Add(HpcLinqExpression.Summarize(j.InnerKeySelectorExpression)); additional.Add(HpcLinqExpression.Summarize(j.ResultSelectorExpression)); if (j.ComparerExpression != null) additional.Add(HpcLinqExpression.Summarize(j.ComparerExpression)); } else if (nc is DryadDistinctNode) { expression = ((DryadDistinctNode)nc).ComparerExpression; } else if (nc is DryadContainsNode) { DryadContainsNode c = (DryadContainsNode)nc; expression = c.ValueExpression; if (c.ComparerExpression != null) additional.Add(HpcLinqExpression.Summarize(c.ComparerExpression)); } else if (nc is DryadBasicAggregateNode) { expression = ((DryadBasicAggregateNode)nc).SelectExpression; } else if (nc is DryadConcatNode) // nothing to do { } else if (nc is DryadSetOperationNode) { expression = ((DryadSetOperationNode)nc).ComparerExpression; } else if (nc is DryadRangePartitionNode) { DryadRangePartitionNode r = (DryadRangePartitionNode)nc; expression = r.CountExpression; // TODO: there's some other possible interesting info } else if (nc is DryadApplyNode) { expression = ((DryadApplyNode)nc).LambdaExpression; } else if (nc is DryadForkNode) { expression = ((DryadForkNode)nc).ForkLambda; } else if (nc is DryadTeeNode) { // nothing } else if (nc is DryadDynamicNode) { // nothing } else { expression = nc.QueryExpression; } if (expression is MethodCallExpression) { MethodCallExpression mc = (MethodCallExpression)expression; methodname = mc.Method.Name; // overwrite methodname // determine which arguments to skip #region LINQMETHODS switch (mc.Method.Name) { case "Aggregate": case "AggregateAsQuery": case "Select": case "LongSelect": case "SelectMany": case "LongSelectMany": case "OfType": case "Where": case "LongWhere": case "First": case "FirstOrDefault": case "FirstAsQuery": case "Single": case "SingleOrDefault": case "SingleAsQuery": case "Last": case "LastOrDefault": case "LastAsQuery": case "Distinct": case "Any": case "AnyAsQuery": case "All": case "AllAsQuery": case "Count": case "CountAsQuery": case "LongCount": case "LongCountAsQuery": case "Sum": case "SumAsQuery": case "Min": case "MinAsQuery": case "Max": case "MaxAsQuery": case "Average": case "AverageAsQuery": case "GroupBy": case "OrderBy": case "OrderByDescending": case "ThenBy": case "ThenByDescending": case "Take": case "TakeWhile": case "LongTakeWhile": case "Skip": case "SkipWhile": case "LongSkipWhile": case "Contains": case "ContainsAsQuery": case "Reverse": case "Merge": case "HashPartition": case "RangePartition": case "Fork": case "ForkChoose": case "AssumeHashPartition": case "AssumeRangePartition": case "AssumeOrderBy": case "ToPartitionedTableLazy": case "AddCacheEntry": case "SlidingWindow": case "SelectWithPartitionIndex": case "ApplyWithPartitionIndex": argsToSkip = 1; break; case "Join": case "GroupJoin": case "Concat": case "MultiConcat": case "Union": case "Intersect": case "Except": case "SequenceEqual": case "SequenceEqualAsQuery": case "Zip": argsToSkip = 2; break; case "Apply": case "ApplyPerPartition": if (mc.Arguments.Count < 3) argsToSkip = 1; else argsToSkip = 2; break; default: throw DryadLinqException.Create(HpcLinqErrorCode.OperatorNotSupported, String.Format(SR.OperatorNotSupported, mc.Method.Name), expression); } #endregion plan.Append(methodname); plan.Append("("); int argno = 0; foreach (var arg in mc.Arguments) { argno++; if (argno <= argsToSkip) continue; if (argno > argsToSkip + 1) { plan.Append(","); } plan.Append(HpcLinqExpression.Summarize(arg)); } plan.AppendLine(")"); } else { // expression is not methodcall plan.Append(methodname); plan.Append("("); if (expression != null) { plan.Append(HpcLinqExpression.Summarize(expression)); } foreach (string e in additional) { plan.Append(","); plan.Append(e); } plan.AppendLine(")"); } } } /// /// Explain a query plan in terms of elementary operations. /// /// Query generator. /// A string explaining the plan. internal string Explain(HpcLinqQueryGen gen) { StringBuilder plan = new StringBuilder(); gen.CodeGenVisit(); this.CodeShowVisit(plan, gen.QueryPlan()); return plan.ToString(); } } /// /// Summary information about a job query plan. /// internal class DryadLinqJobStaticPlan { /// /// Connection between two stages. /// public class Connection { /// /// Arity of connection. /// public enum ConnectionType { /// /// Point-to-point connection between two stages. /// PointToPoint, /// /// Cross-product connection between two stages. /// AllToAll }; /// /// Type of channel backing the connection. /// public enum ChannelType { /// /// Persistent file. /// DiskFile, /// /// In-memory fifo. /// Fifo, /// /// TCP pipe. /// TCP } /// /// Stage originating the connection. /// public Stage From { internal set; get; } /// /// Stage terminating the connection. /// public Stage To { internal set; get; } /// /// Type of connection. /// public ConnectionType Arity { get; internal set; } /// /// Type of channel backing the connection. /// public ChannelType ChannelKind { get; internal set; } /// /// Dynamic manager associated with the connection. /// public string ConnectionManager { get; internal set; } /// /// Color used to represent the connection. /// /// A string describing the color. public string Color() { switch (this.ChannelKind) { case ChannelType.DiskFile: return "black"; case ChannelType.Fifo: return "red"; case ChannelType.TCP: return "yellow"; default: throw new Exception(String.Format(SR.UnknownChannelType, this.ChannelKind.ToString())); } } } /// /// Per-node connection information (should be per-edge...) /// struct ConnectionInformation { /// /// Type of connection. /// public Connection.ConnectionType Arity { get; internal set; } /// /// Type of channel backing the connection. /// public Connection.ChannelType ChannelKind { get; internal set; } /// /// Dynamic manager associated with the connection. /// public string ConnectionManager { get; internal set; } } /// /// Information about a stage. /// public class Stage { /// /// Stage name. /// public string Name { get; internal set; } /// /// Code executed in the stage. /// public string[] Code { get; internal set; } /// /// DryadLINQ operator implemented by the stage. /// public string Operator { get; internal set; } /// /// Number of vertices in stage. /// public int Replication { get; internal set; } /// /// Unique identifier. /// public int Id { get; set; } /// /// True if the stage is an input. /// public bool IsInput { get; internal set; } /// /// True if the stage is an output. /// public bool IsOutput { get; internal set; } /// /// True if the stage is a tee. /// public bool IsTee { get; internal set; } /// /// True if the stage is a concatenation. /// public bool IsConcat { get; internal set; } /// /// True if the stage is virtual (no real vertices synthesized). /// public bool IsVirtual { get { return this.IsInput || this.IsOutput || this.IsTee || this.IsConcat; } } /// /// Only defined for tables. /// public string Uri { get; internal set; } /// /// Only defined for tables. /// public string UriType { get; internal set; } } /// /// File containing the plan. /// string xmlPlanFile; /// /// Map from stage id to stage. /// Dictionary stages; /// /// List of inter-stage connections in the plan. /// List connections; /// /// Store here per-node connection information (map from node id). /// Dictionary perNodeConnectionInfo; /// /// Create a dryadlinq job plan starting from an xml plan file. /// /// Plan file to parse. public DryadLinqJobStaticPlan(string xmlPlanFile) { this.stages = new Dictionary(); this.connections = new List(); this.perNodeConnectionInfo = new Dictionary(); this.xmlPlanFile = xmlPlanFile; this.ParseQueryPlan(); } /// /// Parse an XML query plan and represent that information. /// private void ParseQueryPlan() { if (!File.Exists(this.xmlPlanFile)) throw new Exception(String.Format( SR.CannotReadQueryPlan , this.xmlPlanFile)); XDocument plan = XDocument.Load(this.xmlPlanFile); XElement query = plan.Root.Elements().Where(e => e.Name == "QueryPlan").First(); IEnumerable vertices = query.Elements().Where(e => e.Name == "Vertex"); foreach (XElement v in vertices) { Stage stage = new Stage(); stage.Id = int.Parse(v.Element("UniqueId").Value); stage.Replication = int.Parse(v.Element("Partitions").Value); stage.Operator = v.Element("Type").Value; stage.Name = v.Element("Name").Value; { string code = v.Element("Explain").Value; stage.Code = code.Split('\n'). Skip(1). // drop stage name Select(l => l.Trim()). // remove leading tab ToArray(); } this.stages.Add(stage.Id, stage); { // These should be connection attributes, not stage attributes. string cht = v.Element("ChannelType").Value; string connectionManager = v.Element("DynamicManager").Element("Type").Value; string connection = v.Element("ConnectionOperator").Value; ConnectionInformation info = new ConnectionInformation(); info.ConnectionManager = connectionManager; switch (connection) { case "Pointwise": info.Arity = Connection.ConnectionType.PointToPoint; break; case "CrossProduct": info.Arity = Connection.ConnectionType.AllToAll; break; default: throw new Exception(String.Format( SR.UnknownConnectionType , connection)); } switch (cht) { case "DiskFile": info.ChannelKind = Connection.ChannelType.DiskFile; break; case "TCPPipe": info.ChannelKind = Connection.ChannelType.TCP; break; case "MemoryFIFO": info.ChannelKind = Connection.ChannelType.Fifo; break; default: throw new Exception(String.Format( SR.UnknownChannelType2 , cht)); } this.perNodeConnectionInfo.Add(stage.Id, info); } switch (stage.Operator) { case "InputTable": stage.IsInput = true; stage.UriType = v.Element("StorageSet").Element("Type").Value; stage.Uri = v.Element("StorageSet").Element("SourceURI").Value; break; case "OutputTable": stage.IsOutput = true; stage.UriType = v.Element("StorageSet").Element("Type").Value; stage.Uri = v.Element("StorageSet").Element("SinkURI").Value; break; case "Tee": stage.IsTee = true; break; case "Concat": stage.IsConcat = true; break; default: break; } if (v.Elements("Children").Count() == 0) continue; bool first = true; IEnumerable children = v.Element("Children").Elements().Where(e => e.Name == "Child"); foreach (XElement child in children) { // This code parallels the graphbuilder.cpp for XmlExecHost Connection conn = new Connection(); int fromid = int.Parse(child.Element("UniqueId").Value); ConnectionInformation fromConnectionInformation = this.perNodeConnectionInfo[fromid]; Stage from = this.stages[fromid]; conn.From = from; conn.To = stage; conn.ChannelKind = fromConnectionInformation.ChannelKind; switch (fromConnectionInformation.ConnectionManager) { case "FullAggregator": case "HashDistributor": case "RangeDistributor": // Ignore except first child if (first) { first = false; conn.ConnectionManager = fromConnectionInformation.ConnectionManager; } else { conn.ConnectionManager = ""; } break; case "PartialAggregator": case "Broadcast": // All children have the same connection manager conn.ConnectionManager = fromConnectionInformation.ConnectionManager; break; case "Splitter": // The connection manager depends on the number of children if (first) { first = false; if (children.Count() == 1) conn.ConnectionManager = fromConnectionInformation.ConnectionManager; else conn.ConnectionManager = "SemiSplitter"; } else { conn.ConnectionManager = ""; } break; case "None": case "": break; } conn.Arity = fromConnectionInformation.Arity; this.connections.Add(conn); } } } /// /// Find the stage given the stage id as a string. /// /// Stage id. /// A handle to the stage with the specified static Id. public Stage GetStageByStaticId(string stageId) { int id = int.Parse(stageId); return this.stages[id]; } /// /// Find the stage given the stage name. /// /// Name of stage to return. /// The stage with the given name or null. public Stage GetStageByName(string name) { foreach (Stage s in this.stages.Values) { if (s.Name.Equals(name)) return s; } return null; } /// /// The list of all stages in the plan. /// /// An iterator over the list of stages. public IEnumerable GetAllStages() { return this.stages.Values; } /// /// The list of all connections in the plan. /// /// An iterator over a list of connections. public IEnumerable GetAllConnections() { return this.connections; } } }