Dryad/linqtodryadjm_managed_yarn/QueryParser.cs

429 lines
19 KiB
C#

/*
Copyright (c) Microsoft Corporation
All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in
compliance with the License. You may obtain a copy of the License
at http://www.apache.org/licenses/LICENSE-2.0
THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER
EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED WARRANTIES OR CONDITIONS OF
TITLE, FITNESS FOR A PARTICULAR PURPOSE, MERCHANTABLITY OR NON-INFRINGEMENT.
See the Apache Version 2.0 License for specific language governing permissions and
limitations under the License.
*/
using System;
using System.Collections.Generic;
using System.IO;
using System.Reflection;
using System.Data.Linq;
using System.Data.Linq.Mapping;
using System.Xml;
using System.Xml.Serialization;
using Microsoft.Research.Dryad;
namespace linqtodryadjm_managed
{
public class QueryPlanParser
{
public VertexInfo.IOType GetIoType(string type)
{
if (type == "File") return VertexInfo.IOType.FILELIST;
if (type == "PartitionedFile") return VertexInfo.IOType.PARTITIONEDFILE;
if (type == "FileDirectory") return VertexInfo.IOType.FILEDIRECTORY;
if (type == "FileWildcard") return VertexInfo.IOType.FILEWILDCARD;
if (type == "TidyFS") return VertexInfo.IOType.STREAM;
if (type == "Dsc") return VertexInfo.IOType.STREAM;
if (type == "Hdfs") return VertexInfo.IOType.HDFS_STREAM;
if (type == "FilePrefix") return VertexInfo.IOType.FILEPREFIX;
throw new LinqToDryadException(String.Format("Unknown IoType: {0}", type));
}
public Vertex.Type GetVertexType(string type)
{
if (type == "InputTable") return Vertex.Type.INPUTTABLE;
if (type == "OutputTable") return Vertex.Type.OUTPUTTABLE;
if (type == "Where") return Vertex.Type.WHERE;
if (type == "Join") return Vertex.Type.JOIN;
if (type == "Fork") return Vertex.Type.FORK;
if (type == "Tee") return Vertex.Type.TEE;
if (type == "Concat") return Vertex.Type.CONCAT;
if (type == "Super") return Vertex.Type.SUPER;
if (type == "Apply") return Vertex.Type.SUPER;
return Vertex.Type.UNKNOWN;
}
public Predecessor.ChannelType GetChannelType(string type)
{
if (type == "DiskFile") return Predecessor.ChannelType.DISKFILE;
if (type == "TCPPipe") return Predecessor.ChannelType.TCPPIPE;
if (type == "MemoryFIFO") return Predecessor.ChannelType.MEMORYFIFO;
throw new LinqToDryadException(String.Format("Unknown ChannelType: {0}", type));
}
public Predecessor.ConnectionOperator GetConnectionOperator(string type)
{
if (type == "Pointwise") return Predecessor.ConnectionOperator.POINTWISE;
if (type == "CrossProduct") return Predecessor.ConnectionOperator.CROSSPRODUCT;
throw new LinqToDryadException(String.Format("Unknown ConnectionOperator: {0}", type));
}
public Predecessor.AffinityConstraint GetAffinityConstraint(string type)
{
if (type == "UseDefault") return Predecessor.AffinityConstraint.UseDefault;
if (type == "HardConstraint") return Predecessor.AffinityConstraint.HardConstraint;
if (type == "OptimizationConstraint") return Predecessor.AffinityConstraint.OptimizationConstraint;
if (type == "Preference") return Predecessor.AffinityConstraint.Preference;
if (type == "DontCare") return Predecessor.AffinityConstraint.DontCare;
throw new LinqToDryadException(String.Format("Unknown AffinityConstraint: {0}", type));
}
public DynamicManager.Type GetDynamicManagerType(string type)
{
if (type == "None") return DynamicManager.Type.NONE;
if (type == "Splitter") return DynamicManager.Type.SPLITTER;
if (type == "PartialAggregator") return DynamicManager.Type.PARTIALAGGR;
if (type == "FullAggregator") return DynamicManager.Type.FULLAGGR;
if (type == "HashDistributor") return DynamicManager.Type.HASHDISTRIBUTOR;
if (type == "RangeDistributor") return DynamicManager.Type.RANGEDISTRIBUTOR;
if (type == "Broadcast") return DynamicManager.Type.BROADCAST;
throw new LinqToDryadException(String.Format("Unknown DynamicManager: {0}", type));
}
public static bool SplitEntryIntoAssemblyClassMethod(string entry, out string _assembly, out string _class, out string _method)
{
_assembly = "";
_class = "";
_method = "";
int indexBang = entry.IndexOf("!");
int indexPeriod = entry.LastIndexOf(".");
if (indexBang == -1 || indexPeriod == -1 || indexPeriod <= indexBang)
{
return false;
}
_assembly = entry.Substring(0, indexBang);
_class = entry.Substring(indexBang + 1, indexPeriod - indexBang - 1);
_method = entry.Substring(indexPeriod + 1);
return true;
}
private void ParseQueryXmlLinqToDryad(XmlDocument queryPlanDoc, Query query)
{
XmlElement root = queryPlanDoc.DocumentElement;
//
// Query globals
//
query.queryPlan = new SortedDictionary<int, Vertex>();
query.compilerversion = root.SelectSingleNode("DryadLinqVersion").InnerText;
query.clusterName = root.SelectSingleNode("ClusterName").InnerText;
query.visualization = root.SelectSingleNode("Visualization").InnerText;
// Compression scheme for intermediate data
XmlNode compressionNode = root.SelectSingleNode("IntermediateDataCompression");
if (compressionNode != null)
{
query.intermediateDataCompression = Convert.ToInt32(compressionNode.InnerText);
}
//
// XmlExecHost arguments
//
XmlNodeList nodes = root.SelectSingleNode("XmlExecHostArgs").ChildNodes;
query.xmlExecHostArgs = new string[nodes.Count];
for (int index=0; index<nodes.Count; index++)
{
query.xmlExecHostArgs[index] = nodes[index].InnerText;
}
//
// Get Speculative duplication flag - default is enabled (true)
//
XmlNode duplicationNode = root.SelectSingleNode("EnableSpeculativeDuplication");
if (duplicationNode != null)
{
bool dupFlag;
if (bool.TryParse(duplicationNode.InnerText, out dupFlag))
{
query.enableSpeculativeDuplication = dupFlag;
}
}
nodes = root.SelectSingleNode("QueryPlan").ChildNodes;
//
// Need to remember the conection operator for use when the
// predecessors are being created.
//
string[] vertexConnectionOperator = new string[nodes.Count];
for (int index=0; index<nodes.Count; index++)
{
vertexConnectionOperator[index] = "";
}
for (int index=0; index<nodes.Count; index++)
{
Vertex vertex = new Vertex();
//
// Vertex globals
//
string uniqueId = nodes[index].SelectSingleNode("UniqueId").InnerText;
string vertexType = nodes[index].SelectSingleNode("Type").InnerText;
string name = nodes[index].SelectSingleNode("Name").InnerText;
string partitions = nodes[index].SelectSingleNode("Partitions").InnerText;
string channelType = nodes[index].SelectSingleNode("ChannelType").InnerText;
//
// Need to remember the conection operator for use when the
// predecessors are being created.
//
vertexConnectionOperator[index] = nodes[index].SelectSingleNode("ConnectionOperator").InnerText;
vertex.uniqueId = Convert.ToInt32(uniqueId);
vertex.name = name;
vertex.type = GetVertexType(vertexType);
vertex.partitions = Convert.ToInt32(partitions);
XmlNode dynamicManager = nodes[index].SelectSingleNode("DynamicManager");
string dmType = dynamicManager.SelectSingleNode("Type").InnerText;
vertex.dynamicManager = new DynamicManager();
vertex.dynamicManager.type = GetDynamicManagerType(dmType);
if (vertex.dynamicManager.type == DynamicManager.Type.FULLAGGR)
{
string levels = dynamicManager.SelectSingleNode("AggregationLevels").InnerText;
vertex.dynamicManager.aggregationLevels = Convert.ToInt32(levels);
}
if (vertex.dynamicManager.type == DynamicManager.Type.RANGEDISTRIBUTOR)
{
string sampleRate = dynamicManager.SelectSingleNode("SampleRate").InnerText;
string vertexId = dynamicManager.SelectSingleNode("VertexId").InnerText;
vertex.dynamicManager.sampleRate = Convert.ToDouble(sampleRate);
vertex.dynamicManager.splitVertexId = Convert.ToInt32(vertexId);
}
else
{
XmlNodeList entries = dynamicManager.SelectNodes("Entry");
vertex.dynamicManager.assemblyNames = new string[entries.Count];
vertex.dynamicManager.classNames = new string[entries.Count];
vertex.dynamicManager.methodNames = new string[entries.Count];
for (int entryIndex = 0; entryIndex < entries.Count; entryIndex++)
{
vertex.dynamicManager.assemblyNames[entryIndex] = entries[entryIndex].SelectSingleNode("AssemblyName").InnerText;
vertex.dynamicManager.classNames[entryIndex] = entries[entryIndex].SelectSingleNode("ClassName").InnerText;
vertex.dynamicManager.methodNames[entryIndex] = entries[entryIndex].SelectSingleNode("MethodName").InnerText;
}
}
if (vertex.type == Vertex.Type.INPUTTABLE)
{
XmlNode storageSet = nodes[index].SelectSingleNode("StorageSet");
string ioType = storageSet.SelectSingleNode("Type").InnerText;
vertex.info = new VertexInfo();
vertex.info.ioType = GetIoType(ioType);
XmlNodeList storageUris = storageSet.SelectNodes("SourceURI");
vertex.info.sources = new string[storageUris.Count];
for (int indexStorageUri=0; indexStorageUri<storageUris.Count; indexStorageUri++)
{
vertex.info.sources[indexStorageUri] = storageUris[indexStorageUri].InnerText;
}
}
else if ( vertex.type == Vertex.Type.OUTPUTTABLE )
{
XmlNode storageSet = nodes[index].SelectSingleNode("StorageSet");
string ioType = storageSet.SelectSingleNode("Type").InnerText;
vertex.info = new VertexInfo();
vertex.info.ioType = GetIoType(ioType);
string source = storageSet.SelectSingleNode("SinkURI").InnerText;
vertex.info.sources = new string[1] { source };
if (vertex.info.ioType == VertexInfo.IOType.PARTITIONEDFILE )
{
vertex.info.partitionUncPath = storageSet.SelectSingleNode("PartitionUncPath").InnerText;
}
XmlNode temporary = storageSet.SelectSingleNode("IsTemporary");
if (temporary != null)
{
if (bool.TryParse(temporary.InnerXml, out vertex.info.isTemporary) == false)
{
throw new LinqToDryadException(String.Format("Invalid value for IsTemporary: {0}", temporary.InnerXml));
}
}
else
{
vertex.info.isTemporary = false;
}
XmlNode recordType = storageSet.SelectSingleNode("RecordType");
if (recordType != null)
{
vertex.info.recordType = recordType.InnerXml;
}
XmlNode outputCompressionScheme = storageSet.SelectSingleNode("OutputCompressionScheme");
if (outputCompressionScheme != null)
{
if (int.TryParse(outputCompressionScheme.InnerXml, out vertex.info.compressionScheme) == false)
{
throw new LinqToDryadException(String.Format("Invalid value for OutputCompressionScheme: {0}", outputCompressionScheme.InnerXml));
}
}
else
{
vertex.info.compressionScheme = 0; // TODO: Change to Enum
}
}
else /* JOIN etc. */
{
XmlNode entry = nodes[index].SelectSingleNode("Entry");
vertex.info = new VertexInfo();
vertex.info.assemblyName = entry.SelectSingleNode("AssemblyName").InnerText;
vertex.info.className = entry.SelectSingleNode("ClassName").InnerText;
vertex.info.methodName = entry.SelectSingleNode("MethodName").InnerText;
}
//
// everybody except inputs have children
//
if (vertex.type != Vertex.Type.INPUTTABLE)
{
XmlNodeList children = nodes[index].SelectSingleNode("Children").ChildNodes;
vertex.info.predecessors = new Predecessor[children.Count];
for (int indexChild = 0; indexChild < children.Count; indexChild++)
{
int childId = Convert.ToInt32(children[indexChild].SelectSingleNode("UniqueId").InnerText);
string childConstraint = children[indexChild].SelectSingleNode("AffinityConstraint").InnerText;
vertex.info.predecessors[indexChild] = new Predecessor();
vertex.info.predecessors[indexChild].uniqueId = childId;
vertex.info.predecessors[indexChild].connectionOperator = GetConnectionOperator(vertexConnectionOperator[childId]);
vertex.info.predecessors[indexChild].channelType = GetChannelType(channelType);
vertex.info.predecessors[indexChild].constraint = GetAffinityConstraint(childConstraint);
}
}
//
// In this parser the only way to have an optional Tag is immediately before consuming
// a close of an outer tag. So we have to look for machines tag here.
//
if ((vertex.type != Vertex.Type.INPUTTABLE) && (vertex.type != Vertex.Type.OUTPUTTABLE))
{
XmlNode machinesRoot = nodes[index].SelectSingleNode("Machines");
if (machinesRoot != null)
{
XmlNodeList machines = machinesRoot.ChildNodes;
vertex.machines = new string[machines.Count];
for (int indexMachine = 0; indexMachine < machines.Count; indexMachine++)
{
vertex.machines[indexMachine] = machines[indexMachine].SelectSingleNode("Machine").InnerText;
}
}
}
//
// Add the vertex
//
query.queryPlan.Add(vertex.uniqueId, vertex);
}
}
public bool ParseQueryXml(string queryPlanFileName, Query query)
{
XmlNode version = null;
XmlDocument queryPlanDoc = new XmlDocument();
//
// Load query plan document
//
try
{
queryPlanDoc.Load(queryPlanFileName);
}
catch (Exception e)
{
DryadLogger.LogCritical(0, e, "Failed to load query plan: {0}", queryPlanFileName);
return false;
}
//
// Get DryadLinqVersion - absence used to indicate Argentia query plan
//
try
{
version = queryPlanDoc.DocumentElement.SelectSingleNode("DryadLinqVersion");
}
catch (System.Xml.XPath.XPathException e)
{
DryadLogger.LogCritical(0, e, "Failed to select node DryadLinqVersion from query plan: {0}", queryPlanFileName);
return false;
}
if (version == null)
{
DryadLogger.LogCritical(0, null, "Missing element 'DryadLinqVersion' in query plan: {0}", queryPlanFileName);
return false;
}
//
// Parse query plan XML doc into Query
//
try
{
ParseQueryXmlLinqToDryad(queryPlanDoc, query);
}
catch (Exception e)
{
DryadLogger.LogCritical(0, e, "Failed to parse query plan: {0}", queryPlanFileName);
return false;
}
#if REMOVE_ARGENTIA
else // If (version == null), Argentia query plan
{
// Add the namespace.
XmlNamespaceManager nsmgr = new XmlNamespaceManager(queryPlanDoc.NameTable);
nsmgr.AddNamespace("qp", "http://microsoft.com/PCP/Argentia/QueryPlan.xsd");
version = queryPlanDoc.DocumentElement.SelectSingleNode("qp:RuntimeVersion", nsmgr);
if (version != null)
{
ParseQueryXmlArgentia(queryPlanDoc, query);
}
else
{
DryadLogger.LogCritical(0, null, "Unknown query plan format.");
return false;
}
}
#endif
return true;
}
}
}