/* Copyright (c) Microsoft Corporation All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, MERCHANTABLITY OR NON-INFRINGEMENT. See the Apache Version 2.0 License for specific language governing permissions and limitations under the License. */ using System; using System.IO; using System.Text; using Microsoft.Research.Tools; namespace Microsoft.Research.JobObjectModel { /// /// Error during conversation with cluster. /// public sealed class ClusterException : Exception { /// /// Create an exception about handling a cluster. /// /// Exception message. public ClusterException(string message) : base(message) { } } /// /// Identifier for a Dryad process; for now we are using globally-unique process GUID. /// A Dryad Job identifier is always the same as the job manager process guid. /// [Serializable] public class DryadProcessIdentifier : IEquatable { /// /// Process identifier. /// private string processIdentifier; /// /// Used just for XML serialization. /// public DryadProcessIdentifier() { } /// /// create an indentifier struct. /// Be sure to initialize all fields before use; /// /// The id of the process (platform-dependent). public DryadProcessIdentifier(string pid) { this.processIdentifier = pid; } /// /// Process identifier; public for serialization only. /// public string ProcessIdentifier { get { return this.processIdentifier; } set { this.processIdentifier = value; } } /// /// If true the process identifier is not known. /// public bool IsUnknown { get { return this.ProcessIdentifier == null; } } /// /// Human-readable description of the process identifier. /// /// An empty string if the pid is not set. public override string ToString() { if (this.ProcessIdentifier != null) return this.ProcessIdentifier; return ""; } /// /// Equality test. /// /// Object to compare to. /// True if both objects represent the same process id. public override bool Equals(object obj) { if (!(obj is DryadProcessIdentifier)) return false; return this.Equals((DryadProcessIdentifier)obj); } #region IEquatable Members /// /// Equality test. /// /// Process id to compare to. /// True if the id's represent the same process. public bool Equals(DryadProcessIdentifier other) { if (this.IsUnknown) return other.IsUnknown; if (other.IsUnknown) return false; return this.ProcessIdentifier.Equals(other.ProcessIdentifier); } /// /// Overriden implementation of getHashCode. /// /// The hashcode of the process id. public override int GetHashCode() { // ReSharper disable once BaseObjectGetHashCodeCallInGetHashCode return base.GetHashCode(); } #endregion } /// /// Brief summary of an executed DryadLINQ job. /// [Serializable] public sealed class DryadLinqJobSummary : IEquatable { /// /// Empty constructor for XML serialization. /// public DryadLinqJobSummary() { } /// /// Initialize a job summary. /// /// Cluster where the job ran. /// A string corresponding to the type of ClusterConfiguration. /// Machine where job manager ran. /// Id of job. /// Guid of job manager process. /// Id of job on the cluster. /// Friendly name used. /// Who ran the job. /// Start date (not completion date). /// Job status. /// Estimated end running time. /// Virtual cluster where job ran. public DryadLinqJobSummary( string cluster, ClusterConfiguration.ClusterType clusterType, string virtualcluster, string machine, string jobId, string clusterJobId, DryadProcessIdentifier jmProcessGuid, string friendlyname, string username, DateTime date, DateTime endTime, ClusterJobInformation.ClusterJobStatus status) { this.VirtualCluster = virtualcluster; this.Cluster = cluster; this.ClusterType = clusterType; this.Machine = machine; this.Name = friendlyname; this.User = username; this.Date = date; this.EndTime = endTime; this.Status = status; this.ManagerProcessGuid = jmProcessGuid; this.JobID = jobId; this.ClusterJobId = clusterJobId; } /// /// Cluster where the job ran. /// public string Cluster { get; /*private*/ set; } /// /// Id of cluster job that originated this DryadLinq job (can be used to find the cluster job from the dryadlinq job). /// public string ClusterJobId { get; /*private*/ set; } /// /// Cluster where the job ran. /// public DateTime EndTime { get; /*private*/ set; } /// /// String describing cluster type. /// public ClusterConfiguration.ClusterType ClusterType { get; /*private*/ set; } /// /// Virtual cluster where job ran. /// public string VirtualCluster { get; /*private*/ set; } /// /// (Friendly) name of the job. /// public string Name { get; /*private*/ set; } /// /// User who submitted job. /// public string User { get; /*private*/ set; } /// /// ID of job on the cluster. /// public string JobID { get; /*private*/ set; } /// /// The Guid of the job manager process. /// public DryadProcessIdentifier ManagerProcessGuid { set; get; } /// /// User who submitted job. /// public string GetAlias() { int pos = User.IndexOf(@"\"); return User.Substring(pos + 1); } /// /// Date when job was submitted. /// public DateTime Date { get; /*private*/ set; } /// /// Did the job fail? /// public ClusterJobInformation.ClusterJobStatus Status { get; /*internal*/ set; } /// /// Machine where the job manager ran. /// public string Machine { get; /*private*/ set; } /// /// Get a short name for this job summary. /// /// Short name of job summary. public string ShortName() { // we use the starting time to uniquify the job name return this.Date.ToString("s") + "-" + this.Name; } /// /// True if these two summaries are the same. The status and end time do not matter, since the job may still be running. /// /// Summary to compare against. /// True if they are equal. public bool Equals(DryadLinqJobSummary other) { return this.Cluster == other.Cluster && this.ClusterJobId == other.ClusterJobId && this.Date == other.Date && this.Machine == other.Machine && this.Name == other.Name && this.User == other.User; } /// /// Hashcode proper for the equality test. /// /// The object hashcode. public override int GetHashCode() { return this.ClusterJobId.GetHashCode() ^ this.ClusterJobId.GetHashCode() ^ this.Date.GetHashCode() ^ this.Machine.GetHashCode() ^ this.Name.GetHashCode() ^ this.User.GetHashCode(); } /// /// A string describing the unique identifying part of the summary. /// Two different summaries may represent the same job at different times. /// /// The part common to all jobs. public string AsIdentifyingString() { StringBuilder builder = new StringBuilder(); builder.AppendFormat("Cluster={0} ClusterJobID={1} Date={2} Machine={3} Name={4} User={5}", this.Cluster, this.ClusterJobId, this.Date, this.Machine, this.Name, this.User); return builder.ToString(); } } /// /// This class is an abstraction of a cluster-level job, as opposed to a DryadLINQ job. /// In Cosmos that's called a task, in HPC that's called a Job. /// (In cosmos a task is a recurring job. In DryadLINQ running on top of cosmos, a task always contains exactly one job.) /// public class ClusterJobInformation : IEquatable { /// /// Status of a cluster job. /// public enum ClusterJobStatus { /// /// Job is still running. /// Running, /// /// Job has finished successfully. /// Succeeded, /// /// Job has finished and has failed. /// Failed, /// /// Job has been cancelled. Not precise on cosmos clusters. /// Cancelled, /// /// Could not determine job status. /// Unknown, }; /// /// True if job is finished, false if not, or unknown. /// /// Job status. /// True if the job is no longer running. public static bool JobIsFinished(ClusterJobStatus status) { switch (status) { case ClusterJobInformation.ClusterJobStatus.Failed: case ClusterJobInformation.ClusterJobStatus.Succeeded: case ClusterJobInformation.ClusterJobStatus.Cancelled: return true; case ClusterJobInformation.ClusterJobStatus.Running: case ClusterJobInformation.ClusterJobStatus.Unknown: return false; default: throw new InvalidDataException("Invalid job status " + status); } } /// /// Create a cluster job structure from a bunch of information. /// /// Cluster where the job is running. /// Cluster job guid. /// Name of the cluster job. /// User who submitted cluster job. /// Last execution of cluster job. /// Execution status. /// Time the job ran. /// Cluster where the job has run. public ClusterJobInformation( string cluster, string virtualCluster, string clusterJobGuid, string jobName, string username, DateTime date, TimeSpan runningTime, ClusterJobStatus status) { this.VirtualCluster = virtualCluster; this.Cluster = cluster; this.ClusterJobID = clusterJobGuid; this.Name = jobName; this.User = username; this.Date = date; this.EstimatedRunningTime = runningTime; this.Status = status; this.JobSummary = null; } /// /// Name of cluster job. /// public string Name { get; set; } /// /// Uset who submitted cluster job. /// public string User { get; set; } /// /// Date when job was submitted. /// public DateTime Date { get; set; } /// /// ID of Job on cluster. /// public string ClusterJobID { get; set; } /// /// Status of the execution. /// public ClusterJobStatus Status { get; set; } /// /// Cluster where the job ran. /// public string Cluster { get; set; } /// /// In some installations a cluster is composed of multiple virtual clusters. /// public string VirtualCluster { get; set; } /// /// Is the cluster job information still available on the cluster? /// public bool IsUnavailable { get; set; } /// /// Cache here the associated job, if available. Null if not cached. /// private DryadLinqJobSummary JobSummary { get; set; } /// /// Estimated time the job ran. /// public TimeSpan EstimatedRunningTime { get; set; } /// /// If known, set the associated job summary. /// /// Job summary for this cluster job. public void SetAssociatedSummary(DryadLinqJobSummary summary) { this.JobSummary = summary; } /// /// Discover the dryadlinq job associated with a cluster job. /// /// Cluster configuration. /// The job, if any /// Delegate used to report errors. public DryadLinqJobSummary DiscoverDryadLinqJob(ClusterStatus status, StatusReporter reporter) { if (this.IsUnavailable) return null; if (this.JobSummary != null) return this.JobSummary; DryadLinqJobSummary j = status.DiscoverDryadLinqJobFromClusterJob(this, reporter); if (j == null) { this.IsUnavailable = true; } return this.JobSummary = j; } /// /// Copy the content of a cluster job. /// /// The value to copy from. internal void Copy(ClusterJobInformation refresh) { this.Name = refresh.Name; this.Status = refresh.Status; this.User = refresh.User; this.JobSummary = refresh.JobSummary; this.ClusterJobID = refresh.ClusterJobID; this.Date = refresh.Date; this.IsUnavailable = refresh.IsUnavailable; this.VirtualCluster = refresh.VirtualCluster; this.EstimatedRunningTime = refresh.EstimatedRunningTime; } /// /// True if these two records represent the same job. /// /// /// public bool Equals(ClusterJobInformation other) { return this.Cluster == other.Cluster && this.VirtualCluster == other.VirtualCluster && this.Name == other.Name && this.User == other.User && this.ClusterJobID == other.ClusterJobID && this.Date == other.Date; } } }