Dryad/LinqToDryad/HpcLinqConfiguration.cs

568 lines
20 KiB
C#

/*
Copyright (c) Microsoft Corporation
All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in
compliance with the License. You may obtain a copy of the License
at http://www.apache.org/licenses/LICENSE-2.0
THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER
EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED WARRANTIES OR CONDITIONS OF
TITLE, FITNESS FOR A PARTICULAR PURPOSE, MERCHANTABLITY OR NON-INFRINGEMENT.
See the Apache Version 2.0 License for specific language governing permissions and
limitations under the License.
*/
using System;
using System.Collections.Generic;
using System.Collections.Specialized;
using System.Linq;
using System.Text;
using System.Runtime.CompilerServices;
[assembly: InternalsVisibleTo("DistributedDandelion")]
namespace Microsoft.Research.DryadLinq
{
/// <summary>
/// Configuration information for a HPC Query.
/// </summary>
public sealed class HpcLinqConfiguration
{
internal bool _isReadOnly;
private HpcLinqStringList _resourcesToAdd = new HpcLinqStringList();
private HpcLinqStringList _resourcesToRemove = new HpcLinqStringList();
private DscCompressionScheme _intermediateDataCompressionScheme = DscCompressionScheme.Gzip;
private DscCompressionScheme _outputCompressionScheme = DscCompressionScheme.None;
private bool _compileForVertexDebugging = false; // Ship PDBs + No optimization
private string _headNode;
private string _hdfsNameNode;
private int _hdfsNameNodeHttpPort = 8033; //TODO - Read Config
private string _jobFriendlyName;
private int? _jobMinNodes;
private int? _jobMaxNodes;
private string _nodeGroup;
private int? _jobRuntimeLimit;
private bool _localDebug = false;
private bool _orderPreserving = true;
private string _jobUsername = null;
private string _jobPassword = null;
private HpcQueryTraceLevel _runtimeTraceLevel = HpcQueryTraceLevel.Error;
private string _graphManagerNode;
private bool _enableSpeculativeDuplication = false;
private HpcLinqStringDictionary _jobEnvironmentVariables = new HpcLinqStringDictionary();
private bool _selectAndWherePreserveOrder = false;
private bool _matchClientNetFrameworkVersion = false;
private bool _multiThreading = true;
//Set these values using YARN_HOME and DRYAD_HOME environment variables
private string _yarnHome = Environment.GetEnvironmentVariable("YARN_HOME");
private string _dryadHome = Environment.GetEnvironmentVariable("DRYAD_HOME");
private void ThrowIfReadOnly()
{
if (_isReadOnly)
{
throw new NotSupportedException(SR.ConfigReadonly);
}
}
/// <summary>
/// Gets the value indicating whether the HpcLinqConfiguration is read-only.
/// </summary>
/// <remarks>
/// When <see cref="IsReadOnly"/> is true, every property except JobFriendlyName will throw a <see cref="System.NotSupportedException"/>
/// from its setter.
/// </remarks>
public bool IsReadOnly
{
get { return _isReadOnly; }
}
/// <summary>
/// Gets or sets the value specifying whether data passed between stages in a HPC Query will be compressed.
/// </summary>
/// <remarks>
/// The default is true.
/// </remarks>
public DscCompressionScheme IntermediateDataCompressionScheme
{
get { return _intermediateDataCompressionScheme; }
set
{
ThrowIfReadOnly();
_intermediateDataCompressionScheme = value;
}
}
/// <summary>
/// Gets or sets the value specifying the compression scheme for output data.
/// </summary>
/// <remarks>
/// The default is <see cref="DscCompressionScheme.None"/>.
/// </remarks>
public DscCompressionScheme OutputDataCompressionScheme
{
get { return _outputCompressionScheme; }
set
{
ThrowIfReadOnly();
_outputCompressionScheme = value;
}
}
/// <summary>
/// Gets or sets the value specifying whether to compile code that support debugging vertex tasks that execute on a HPC Server cluster.
/// </summary>
/// <remarks>
/// If true, vertex code will be compiled with no code-level optimizations and a PDB will be generated.
/// Also, the query execution job look for and include the PDB associated with every DLL resource
/// that is part of the submitted job.
/// <para>The default is false.</para>
/// </remarks>
public bool CompileForVertexDebugging
{
get { return _compileForVertexDebugging; }
set
{
ThrowIfReadOnly();
_compileForVertexDebugging = value;
}
}
/// <summary>
/// Gets or sets the bin directory for Dryad.
/// </summary>
public string DryadHomeDirectory
{
get { return _dryadHome; }
set
{
ThrowIfReadOnly();
_dryadHome = value;
}
}
/// <summary>
/// Gets or sets the home directory for Yarn.
/// </summary>
public string YarnHomeDirectory
{
get { return _yarnHome; }
set
{
ThrowIfReadOnly();
_yarnHome = value;
}
}
/// <summary>
/// Gets or sets the head node for the HPC Server used to execute the HPC Query job.
/// </summary>
public string HeadNode
{
get { return _headNode; }
set
{
ThrowIfReadOnly();
_headNode = value;
}
}
/// <summary>
/// Gets or sets the namenode for the HDFS.
/// </summary>
public string HdfsNameNode
{
get { return _hdfsNameNode; }
set
{
ThrowIfReadOnly();
_hdfsNameNode = value;
}
}
/// <summary>
/// Gets or sets the HTTP port used by the namenode for the HDFS.
/// </summary>
public int HdfsNameNodeHttpPort
{
get { return _hdfsNameNodeHttpPort; }
set
{
ThrowIfReadOnly();
_hdfsNameNodeHttpPort = value;
}
}
/// <summary>
/// Gets the collection of environment variables associated with the HPC Query job.
/// </summary>
public IDictionary<string, string> JobEnvironmentVariables
{
get { return _jobEnvironmentVariables; }
}
/// <summary>
/// Gets or sets the descriptive name used to describe the HPC Query job.
/// </summary>
/// <remarks>
/// <para>The default is null (no name). May be overriden by cluster settings such as node templates.</para>
/// <para>This property can be altered even when <see cref="IsReadOnly"/> is true.</para>
/// </remarks>
public string JobFriendlyName
{
get { return _jobFriendlyName; }
set
{
_jobFriendlyName = value;
}
}
/// <summary>
/// Gets or sets the minimum number of cluster nodes that the HPC Server job will use.
/// </summary>
/// <remarks>
/// <para>The default is null (no lower limit). May be overriden by cluster settings such as node templates.</para>
/// </remarks>
public int? JobMinNodes
{
get { return _jobMinNodes; }
set
{
ThrowIfReadOnly();
_jobMinNodes = value;
}
}
/// <summary>
/// Gets or sets the maximum number of cluster nodes that the HPC Server job will use.
/// </summary>
/// <remarks>
/// <para>The default is null (no upper limit). May be overriden by cluster settings such as node templates.</para>
/// </remarks>
public int? JobMaxNodes
{
get { return _jobMaxNodes; }
set
{
ThrowIfReadOnly();
_jobMaxNodes = value;
}
}
/// <summary>
/// Gets or sets the name of the compute node group that the HPC Server job will use.
/// </summary>
/// <remarks>
/// Creation and management of nodes groups is performed using the HPC Cluster Manager.
/// </remarks>
/// <remarks>
/// <para>The default is null (no node group restriction). May be overriden by cluster settings such as node templates.</para>
/// </remarks>
public string NodeGroup
{
get { return _nodeGroup; }
set
{
ThrowIfReadOnly();
_nodeGroup = value;
}
}
/// <summary>
/// Gets or sets the maximum execution time for the HPC Query job, in seconds.
/// </summary>
/// <remarks>
/// <para>The default is null (no runtime limit). May be overriden by cluster settings such as node templates.</para>
/// </remarks>
public int? JobRuntimeLimit
{
get { return _jobRuntimeLimit; }
set
{
ThrowIfReadOnly();
_jobRuntimeLimit = value;
}
}
/// <summary>
/// Enables or disables speculative duplication of vertices based on runtime performance analysis.
/// </summary>
/// <remarks>
/// <para>The default is true.</para>
/// </remarks>
public bool EnableSpeculativeDuplication
{
get { return _enableSpeculativeDuplication; }
set
{
ThrowIfReadOnly();
_enableSpeculativeDuplication = value;
}
}
/// <summary>
/// Gets or sets the value specifying whether to use Local debugging mode.
/// </summary>
/// <remarks>
/// <para>
/// If true, the HPC Query will execute in the current AppDomain via LINQ-to-Objects.
/// This mode is particularly useful for debugging user-functions before attempting cluster execution.
/// LocalDebug mode accesses DSC as usual for input and output data.
/// </para>
/// <para>
/// LocalDebug mode does not perform vertex-code compilation, nor is a job submitted to HPC Server.
/// </para>
/// <para>The default is false.</para>
/// </remarks>
public bool LocalDebug
{
get { return _localDebug; }
set
{
ThrowIfReadOnly();
_localDebug = value;
}
}
/// <summary>
/// Get the list of resources to add to the HPC job used to execute a HPC Query.
/// </summary>
/// <remarks>
/// <para>
/// During query submission, some resources will be detected and added automatically. It is only necessary
/// to add resources that are not detected automatically.
/// </para>
/// <para>
/// Each resource should be a complete path to a file-based resource accessible from the local machine.
/// </para>
/// </remarks>
public IList<string> ResourcesToAdd
{
get { return _resourcesToAdd; }
}
/// <summary>
/// Get the list of resources to remove from the HPC job used to execute a HPC Query.
/// </summary>
/// <remarks>
/// <para>
/// During query submission, some resources will be detected and added automatically.
/// Remove resources that are detected automatically but that are not required for job execution.
/// </para>
/// <para>
/// Each resource should be a complete path to a file-based resource accessible from the local machine.
/// </para>
/// </remarks>
public IList<string> ResourcesToRemove
{
get { return _resourcesToRemove; }
}
/// <summary>
/// Gets or sets the RunAs password for jobs submitted to HPC Server.
/// </summary>
/// <remarks>
/// <para>The default is null (use the credentials of the current Thread)</para>
/// </remarks>
public string JobUsername
{
get { return _jobUsername; }
set
{
ThrowIfReadOnly();
_jobUsername = value;
}
}
/// <summary>
/// Gets or sets the RunAs password for jobs submitted to HPC Server.
/// </summary>
/// <remarks>
/// <para>The default is null (use the credentials of the current Thread)</para>
/// </remarks>
public string JobPassword
{
get { return _jobPassword; }
set
{
ThrowIfReadOnly();
_jobPassword = value;
}
}
/// <summary>
/// Gets or sets the trace level to use for HPC Query jobs.
/// </summary>
/// <remarks>
/// <para>The RuntimeTraceLevel affects the logs produced by all components associated with the execution
/// of a HPC Query job.
/// </para>
/// <para>The default is HpcQueryTraceLevel.Error</para>
/// </remarks>
public HpcQueryTraceLevel RuntimeTraceLevel
{
get { return _runtimeTraceLevel; }
set
{
ThrowIfReadOnly();
_runtimeTraceLevel = value;
}
}
#if YARN_MISSING_FEATURE
/// <summary>
/// Gets or sets the node that should be used for running the HPC Query Graph Manager task.
/// </summary>
/// <remarks>
/// If null, the Graph Manager task will run on an arbitrary machine that is allocated to the HPC Query job.
/// </remarks>
public string GraphManagerNode
{
get { return _graphManagerNode; }
set
{
ThrowIfReadOnly();
_graphManagerNode = value;
}
}
#endif
/// <summary>
/// Gets or sets whether certain operators will preserve item ordering.
/// When true, the Select, SelectMany and Where operators will preserve item ordering;
/// otherwise, they may shuffle the input items as they are processed.
/// </summary>
public bool SelectiveOrderPreservation
{
get { return _selectAndWherePreserveOrder; }
set
{
ThrowIfReadOnly();
_selectAndWherePreserveOrder = value;
}
}
/// <summary>
/// Configures query jobs to be launched on the cluster nodes against a .NET framework version
/// matching that of the client process. This should only be set if all cluster nodes are known to have
/// the same .NET version as the client.
/// When set to false (default), the vertex code will be compiled and run against .NET Framework 3.5.
/// </summary>
public bool MatchClientNetFrameworkVersion
{
get { return _matchClientNetFrameworkVersion; }
set
{
ThrowIfReadOnly();
_matchClientNetFrameworkVersion = value;
}
}
/// <summary>
/// Gets or sets whether user-defined methods and custom serializers may be called on multiple threads of a single process.
/// </summary>
/// <remarks>
/// This option affects the internal behavior of individual queries and applies to both the client process (for serialization and local-debug mode)
/// and to vertex processes.
/// This option does not have any serializing effect for queries that are submitted concurrently by one or more client processes.
/// If true, user-defined methods may be called concurrently.
/// If false, user-defined methods will be called without concurrency.
/// </remarks>
public bool AllowConcurrentUserDelegatesInSingleProcess
{
get { return _multiThreading; }
set
{
ThrowIfReadOnly();
_multiThreading = value;
}
}
/// <summary>
/// Initializes a new instance of the HpcLinqConfiguration class.
/// </summary>
public HpcLinqConfiguration()
{
CommonInit();
}
/// <summary>
/// Initializes a new instance of the HpcLinqConfiguration class.
/// </summary>
/// <param name="headNode">The head node for the HPC Server used to execute the HPC Query job.</param>
public HpcLinqConfiguration(string headNode)
{
_headNode = headNode;
_hdfsNameNode = headNode; //default
CommonInit();
}
/// <summary>
/// Initializes a new instance of the HpcLinqConfiguration class.
/// </summary>
/// <param name="headNode">The head node for the HPC Server used to execute the HPC Query job.</param>
/// <param name="hdfsNameNode">The namenode for the HDFS.</param>
public HpcLinqConfiguration(string headNode, string hdfsNameNode)
{
_headNode = headNode;
_hdfsNameNode = hdfsNameNode;
CommonInit();
}
private void CommonInit()
{
_yarnHome = Environment.GetEnvironmentVariable("YARN_HOME");
_dryadHome = Environment.GetEnvironmentVariable("DRYAD_HOME");
}
internal HpcLinqConfiguration MakeImmutableCopy()
{
HpcLinqConfiguration newConfig = new HpcLinqConfiguration();
newConfig._isReadOnly = true;
newConfig._jobEnvironmentVariables = this._jobEnvironmentVariables.GetImmutableClone();
newConfig._resourcesToAdd = this._resourcesToAdd.GetImmutableClone();
newConfig._resourcesToRemove = this._resourcesToRemove.GetImmutableClone();
newConfig._intermediateDataCompressionScheme = this._intermediateDataCompressionScheme;
newConfig._outputCompressionScheme = this._outputCompressionScheme;
newConfig._compileForVertexDebugging = this._compileForVertexDebugging;
newConfig._headNode = this._headNode;
newConfig._hdfsNameNode = this._hdfsNameNode;
newConfig._hdfsNameNodeHttpPort = this._hdfsNameNodeHttpPort;
newConfig._jobFriendlyName = this._jobFriendlyName;
newConfig._jobMinNodes = this._jobMinNodes;
newConfig._jobMaxNodes = this._jobMaxNodes;
newConfig._nodeGroup = this._nodeGroup;
newConfig._jobRuntimeLimit = this._jobRuntimeLimit;
newConfig._localDebug = this._localDebug;
newConfig._orderPreserving = this._orderPreserving;
newConfig._jobUsername = this._jobUsername;
newConfig._jobPassword = this.JobPassword;
newConfig._runtimeTraceLevel = this._runtimeTraceLevel;
newConfig._graphManagerNode = this._graphManagerNode;
newConfig._selectAndWherePreserveOrder = this._selectAndWherePreserveOrder;
newConfig._matchClientNetFrameworkVersion = this._matchClientNetFrameworkVersion;
newConfig._enableSpeculativeDuplication = this._enableSpeculativeDuplication;
newConfig._multiThreading = this._multiThreading;
newConfig._dryadHome = this._dryadHome;
newConfig._yarnHome = this._yarnHome;
return newConfig;
}
}
}