/* Copyright (c) Microsoft Corporation All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, MERCHANTABLITY OR NON-INFRINGEMENT. See the Apache Version 2.0 License for specific language governing permissions and limitations under the License. */ using System; using System.Collections.Generic; using System.Collections.Specialized; using System.Linq; using System.Text; using System.Runtime.CompilerServices; [assembly: InternalsVisibleTo("DistributedDandelion")] namespace Microsoft.Research.DryadLinq { /// /// Configuration information for a HPC Query. /// public sealed class HpcLinqConfiguration { internal bool _isReadOnly; private HpcLinqStringList _resourcesToAdd = new HpcLinqStringList(); private HpcLinqStringList _resourcesToRemove = new HpcLinqStringList(); private DscCompressionScheme _intermediateDataCompressionScheme = DscCompressionScheme.Gzip; private DscCompressionScheme _outputCompressionScheme = DscCompressionScheme.None; private bool _compileForVertexDebugging = false; // Ship PDBs + No optimization private string _headNode; private string _hdfsNameNode; private int _hdfsNameNodeHttpPort = 8033; //TODO - Read Config private string _jobFriendlyName; private int? _jobMinNodes; private int? _jobMaxNodes; private string _nodeGroup; private int? _jobRuntimeLimit; private bool _localDebug = false; private bool _orderPreserving = true; private string _jobUsername = null; private string _jobPassword = null; private HpcQueryTraceLevel _runtimeTraceLevel = HpcQueryTraceLevel.Error; private string _graphManagerNode; private bool _enableSpeculativeDuplication = false; private HpcLinqStringDictionary _jobEnvironmentVariables = new HpcLinqStringDictionary(); private bool _selectAndWherePreserveOrder = false; private bool _matchClientNetFrameworkVersion = false; private bool _multiThreading = true; //Set these values using YARN_HOME and DRYAD_HOME environment variables private string _yarnHome = Environment.GetEnvironmentVariable("YARN_HOME"); private string _dryadHome = Environment.GetEnvironmentVariable("DRYAD_HOME"); private void ThrowIfReadOnly() { if (_isReadOnly) { throw new NotSupportedException(SR.ConfigReadonly); } } /// /// Gets the value indicating whether the HpcLinqConfiguration is read-only. /// /// /// When is true, every property except JobFriendlyName will throw a /// from its setter. /// public bool IsReadOnly { get { return _isReadOnly; } } /// /// Gets or sets the value specifying whether data passed between stages in a HPC Query will be compressed. /// /// /// The default is true. /// public DscCompressionScheme IntermediateDataCompressionScheme { get { return _intermediateDataCompressionScheme; } set { ThrowIfReadOnly(); _intermediateDataCompressionScheme = value; } } /// /// Gets or sets the value specifying the compression scheme for output data. /// /// /// The default is . /// public DscCompressionScheme OutputDataCompressionScheme { get { return _outputCompressionScheme; } set { ThrowIfReadOnly(); _outputCompressionScheme = value; } } /// /// Gets or sets the value specifying whether to compile code that support debugging vertex tasks that execute on a HPC Server cluster. /// /// /// If true, vertex code will be compiled with no code-level optimizations and a PDB will be generated. /// Also, the query execution job look for and include the PDB associated with every DLL resource /// that is part of the submitted job. /// The default is false. /// public bool CompileForVertexDebugging { get { return _compileForVertexDebugging; } set { ThrowIfReadOnly(); _compileForVertexDebugging = value; } } /// /// Gets or sets the bin directory for Dryad. /// public string DryadHomeDirectory { get { return _dryadHome; } set { ThrowIfReadOnly(); _dryadHome = value; } } /// /// Gets or sets the home directory for Yarn. /// public string YarnHomeDirectory { get { return _yarnHome; } set { ThrowIfReadOnly(); _yarnHome = value; } } /// /// Gets or sets the head node for the HPC Server used to execute the HPC Query job. /// public string HeadNode { get { return _headNode; } set { ThrowIfReadOnly(); _headNode = value; } } /// /// Gets or sets the namenode for the HDFS. /// public string HdfsNameNode { get { return _hdfsNameNode; } set { ThrowIfReadOnly(); _hdfsNameNode = value; } } /// /// Gets or sets the HTTP port used by the namenode for the HDFS. /// public int HdfsNameNodeHttpPort { get { return _hdfsNameNodeHttpPort; } set { ThrowIfReadOnly(); _hdfsNameNodeHttpPort = value; } } /// /// Gets the collection of environment variables associated with the HPC Query job. /// public IDictionary JobEnvironmentVariables { get { return _jobEnvironmentVariables; } } /// /// Gets or sets the descriptive name used to describe the HPC Query job. /// /// /// The default is null (no name). May be overriden by cluster settings such as node templates. /// This property can be altered even when is true. /// public string JobFriendlyName { get { return _jobFriendlyName; } set { _jobFriendlyName = value; } } /// /// Gets or sets the minimum number of cluster nodes that the HPC Server job will use. /// /// /// The default is null (no lower limit). May be overriden by cluster settings such as node templates. /// public int? JobMinNodes { get { return _jobMinNodes; } set { ThrowIfReadOnly(); _jobMinNodes = value; } } /// /// Gets or sets the maximum number of cluster nodes that the HPC Server job will use. /// /// /// The default is null (no upper limit). May be overriden by cluster settings such as node templates. /// public int? JobMaxNodes { get { return _jobMaxNodes; } set { ThrowIfReadOnly(); _jobMaxNodes = value; } } /// /// Gets or sets the name of the compute node group that the HPC Server job will use. /// /// /// Creation and management of nodes groups is performed using the HPC Cluster Manager. /// /// /// The default is null (no node group restriction). May be overriden by cluster settings such as node templates. /// public string NodeGroup { get { return _nodeGroup; } set { ThrowIfReadOnly(); _nodeGroup = value; } } /// /// Gets or sets the maximum execution time for the HPC Query job, in seconds. /// /// /// The default is null (no runtime limit). May be overriden by cluster settings such as node templates. /// public int? JobRuntimeLimit { get { return _jobRuntimeLimit; } set { ThrowIfReadOnly(); _jobRuntimeLimit = value; } } /// /// Enables or disables speculative duplication of vertices based on runtime performance analysis. /// /// /// The default is true. /// public bool EnableSpeculativeDuplication { get { return _enableSpeculativeDuplication; } set { ThrowIfReadOnly(); _enableSpeculativeDuplication = value; } } /// /// Gets or sets the value specifying whether to use Local debugging mode. /// /// /// /// If true, the HPC Query will execute in the current AppDomain via LINQ-to-Objects. /// This mode is particularly useful for debugging user-functions before attempting cluster execution. /// LocalDebug mode accesses DSC as usual for input and output data. /// /// /// LocalDebug mode does not perform vertex-code compilation, nor is a job submitted to HPC Server. /// /// The default is false. /// public bool LocalDebug { get { return _localDebug; } set { ThrowIfReadOnly(); _localDebug = value; } } /// /// Get the list of resources to add to the HPC job used to execute a HPC Query. /// /// /// /// During query submission, some resources will be detected and added automatically. It is only necessary /// to add resources that are not detected automatically. /// /// /// Each resource should be a complete path to a file-based resource accessible from the local machine. /// /// public IList ResourcesToAdd { get { return _resourcesToAdd; } } /// /// Get the list of resources to remove from the HPC job used to execute a HPC Query. /// /// /// /// During query submission, some resources will be detected and added automatically. /// Remove resources that are detected automatically but that are not required for job execution. /// /// /// Each resource should be a complete path to a file-based resource accessible from the local machine. /// /// public IList ResourcesToRemove { get { return _resourcesToRemove; } } /// /// Gets or sets the RunAs password for jobs submitted to HPC Server. /// /// /// The default is null (use the credentials of the current Thread) /// public string JobUsername { get { return _jobUsername; } set { ThrowIfReadOnly(); _jobUsername = value; } } /// /// Gets or sets the RunAs password for jobs submitted to HPC Server. /// /// /// The default is null (use the credentials of the current Thread) /// public string JobPassword { get { return _jobPassword; } set { ThrowIfReadOnly(); _jobPassword = value; } } /// /// Gets or sets the trace level to use for HPC Query jobs. /// /// /// The RuntimeTraceLevel affects the logs produced by all components associated with the execution /// of a HPC Query job. /// /// The default is HpcQueryTraceLevel.Error /// public HpcQueryTraceLevel RuntimeTraceLevel { get { return _runtimeTraceLevel; } set { ThrowIfReadOnly(); _runtimeTraceLevel = value; } } #if YARN_MISSING_FEATURE /// /// Gets or sets the node that should be used for running the HPC Query Graph Manager task. /// /// /// If null, the Graph Manager task will run on an arbitrary machine that is allocated to the HPC Query job. /// public string GraphManagerNode { get { return _graphManagerNode; } set { ThrowIfReadOnly(); _graphManagerNode = value; } } #endif /// /// Gets or sets whether certain operators will preserve item ordering. /// When true, the Select, SelectMany and Where operators will preserve item ordering; /// otherwise, they may shuffle the input items as they are processed. /// public bool SelectiveOrderPreservation { get { return _selectAndWherePreserveOrder; } set { ThrowIfReadOnly(); _selectAndWherePreserveOrder = value; } } /// /// Configures query jobs to be launched on the cluster nodes against a .NET framework version /// matching that of the client process. This should only be set if all cluster nodes are known to have /// the same .NET version as the client. /// When set to false (default), the vertex code will be compiled and run against .NET Framework 3.5. /// public bool MatchClientNetFrameworkVersion { get { return _matchClientNetFrameworkVersion; } set { ThrowIfReadOnly(); _matchClientNetFrameworkVersion = value; } } /// /// Gets or sets whether user-defined methods and custom serializers may be called on multiple threads of a single process. /// /// /// This option affects the internal behavior of individual queries and applies to both the client process (for serialization and local-debug mode) /// and to vertex processes. /// This option does not have any serializing effect for queries that are submitted concurrently by one or more client processes. /// If true, user-defined methods may be called concurrently. /// If false, user-defined methods will be called without concurrency. /// public bool AllowConcurrentUserDelegatesInSingleProcess { get { return _multiThreading; } set { ThrowIfReadOnly(); _multiThreading = value; } } /// /// Initializes a new instance of the HpcLinqConfiguration class. /// public HpcLinqConfiguration() { CommonInit(); } /// /// Initializes a new instance of the HpcLinqConfiguration class. /// /// The head node for the HPC Server used to execute the HPC Query job. public HpcLinqConfiguration(string headNode) { _headNode = headNode; _hdfsNameNode = headNode; //default CommonInit(); } /// /// Initializes a new instance of the HpcLinqConfiguration class. /// /// The head node for the HPC Server used to execute the HPC Query job. /// The namenode for the HDFS. public HpcLinqConfiguration(string headNode, string hdfsNameNode) { _headNode = headNode; _hdfsNameNode = hdfsNameNode; CommonInit(); } private void CommonInit() { _yarnHome = Environment.GetEnvironmentVariable("YARN_HOME"); _dryadHome = Environment.GetEnvironmentVariable("DRYAD_HOME"); } internal HpcLinqConfiguration MakeImmutableCopy() { HpcLinqConfiguration newConfig = new HpcLinqConfiguration(); newConfig._isReadOnly = true; newConfig._jobEnvironmentVariables = this._jobEnvironmentVariables.GetImmutableClone(); newConfig._resourcesToAdd = this._resourcesToAdd.GetImmutableClone(); newConfig._resourcesToRemove = this._resourcesToRemove.GetImmutableClone(); newConfig._intermediateDataCompressionScheme = this._intermediateDataCompressionScheme; newConfig._outputCompressionScheme = this._outputCompressionScheme; newConfig._compileForVertexDebugging = this._compileForVertexDebugging; newConfig._headNode = this._headNode; newConfig._hdfsNameNode = this._hdfsNameNode; newConfig._hdfsNameNodeHttpPort = this._hdfsNameNodeHttpPort; newConfig._jobFriendlyName = this._jobFriendlyName; newConfig._jobMinNodes = this._jobMinNodes; newConfig._jobMaxNodes = this._jobMaxNodes; newConfig._nodeGroup = this._nodeGroup; newConfig._jobRuntimeLimit = this._jobRuntimeLimit; newConfig._localDebug = this._localDebug; newConfig._orderPreserving = this._orderPreserving; newConfig._jobUsername = this._jobUsername; newConfig._jobPassword = this.JobPassword; newConfig._runtimeTraceLevel = this._runtimeTraceLevel; newConfig._graphManagerNode = this._graphManagerNode; newConfig._selectAndWherePreserveOrder = this._selectAndWherePreserveOrder; newConfig._matchClientNetFrameworkVersion = this._matchClientNetFrameworkVersion; newConfig._enableSpeculativeDuplication = this._enableSpeculativeDuplication; newConfig._multiThreading = this._multiThreading; newConfig._dryadHome = this._dryadHome; newConfig._yarnHome = this._yarnHome; return newConfig; } } }