/* Copyright (c) Microsoft Corporation All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, MERCHANTABLITY OR NON-INFRINGEMENT. See the Apache Version 2.0 License for specific language governing permissions and limitations under the License. */ using System; using System.Collections.Generic; using System.Linq; using System.Runtime.CompilerServices; using System.Text; using System.Threading.Tasks; using Microsoft.Research.Peloponnese.NotHttpClient; namespace Microsoft.Research.Dryad.ClusterInterface { /// /// this is the connection to the application's logging interface, supplied /// by the external application /// public interface ILogger { void Log( string entry, [CallerFilePath] string file = "(nofile)", [CallerMemberName] string function = "(nofunction)", [CallerLineNumber] int line = -1); } /// /// this is the information available to the external application about a /// "computer" in the cluster. This is really a slot in a cluster's resource manager; /// there may be multiple computers on the same host. /// public interface IComputer { /// /// the unique name of this resource /// string Name { get; } /// /// the URI for communicating with processes /// string ProcessServer { get; } /// /// the URI for fetching remote files /// string FileServer { get; } /// /// the directory for fetching local files /// string Directory { get; } /// /// the hostname this is running on, for data locality purposes /// string Host { get; } /// /// the cluster rack this is running on, for data locality purposes /// string RackName { get; } } /// /// this is the information available to the external application about a Dryad /// process that has been started on an IComputer. Basically just a UID to use in /// logging, and an interface for constructing references to files that it wrote /// public interface IProcess { /// /// a UID for this process /// string Id { get; } /// /// the directory for files written by this process /// string Directory { get; } } /// /// affinities can be described at different levels. We only use Computer and Rack /// for now /// public enum AffinityResourceLevel { Core = 0, Socket, Host, Rack, Cluster } /// /// the external application supplies affinity resources when making scheduling requests /// public class AffinityResource { /// /// the granularity of this affinity; for now we only use Computer and Rack /// public AffinityResourceLevel level; /// /// the identifying string for the affinity. It is a UID with respect to the level, /// so identifies a unique host or rack /// public string locality; /// /// create a new affinity object /// /// the granularity of the affinity /// the locality of the affinity public AffinityResource(AffinityResourceLevel l, string place) { level = l; locality = place; } } /// /// a particular affinity that a process has to be scheduled somewhere /// public class Affinity { /// /// if this is true, the process may not run anywhere else /// public bool isHardContraint; /// /// this is a list of equally good places to run, e.g. the locations of /// all the replicas of a file /// public List affinities; /// /// this is a weight, corresponding to the amount of data the process /// would like to read from the locality in question /// public UInt64 weight; /// /// make a new affinity descriptor /// /// true if this is a hard constraint and the process may not run anywhere else /// weight corresponding to the amount of data the process would like to read from here public Affinity(bool hc, UInt64 w) { isHardContraint = hc; affinities = new List(); weight = w; } } /// /// state returned when a the status of a key at a process is queried after the process has started running /// public enum ProcessState { /// /// the process is still running /// Running, /// /// the process has reported successful completion /// Completed, /// /// the process has exited without reporting successful completion /// Failed } /// /// state returned when a process has exited /// public enum ProcessExitState { /// /// the process was never started due to an error /// ScheduleFailed, /// /// contact was lost with the process /// StatusFailed, /// /// the process was canceled while scheduling /// ScheduleCanceled, /// /// the process had started, and has now exited /// ProcessExited } /// /// When the higher level of the software stack wants to schedule a process, it /// passes in an object that implements IProcessWatcher to receive updates in the /// form of callbacks as the process is queued, matched, scheduled, run, etc. /// public interface IProcessWatcher { /// /// OnQueued is called when the process has been placed in the scheduling queues. /// void OnQueued(); /// /// OnMatched is called when the process has been matched to computer /// and is about to be scheduled there. /// /// The computer the process has been scheduled on /// The UTC time on the local computer that the process was scheduled void OnMatched(IComputer computer, long timestamp); /// /// OnCreated is called when the process has been created on the remote computer. /// /// The UTC time on the local computer that the remote daemon responded to the process create request void OnCreated(long timestamp); /// /// OnStarted is called when the process has started running on the remote computer. /// /// The UTC time on the remote computer that the process started running void OnStarted(long timestamp); /// /// OnExited is called when the process has finished, either because it could not be /// created (state=ScheduleFailed), because contact was lost with its daemon /// (state=StatusFailed) or because it has finished (state=ProcessExited). exitCode is /// the process exit code /// /// How far through scheduling the process got /// The UTC time on the remote computer that the process stopped running, or on the local computer /// if we lost contact with the remote daemon /// The exit code of the process if it was started, or 1 otherwise /// A description of the error if the process didn't exit cleanly void OnExited(ProcessExitState state, long timestamp, int exitCode, string errorText); } /// /// when the application wants to learn the status of a key on a running process, it passes in an /// IProcessStatus object identifying the key being queried, and including a callback method /// that is called when the status is known /// public interface IProcessKeyStatus { /// /// the key to query at the process, filled in by the application /// /// the key the application wants to query string GetKey(); /// /// the heartbeat timeout, filled in by the application /// /// how long to block waiting for the key to change before returning its status. /// OnCompleted will be called either when the version increases, or the timeout expires. int GetTimeout(); /// /// the last known version of the key, filled in by the application /// /// the last version of the key seen by the application. OnCompleted will be called either /// when the version increases, or the timeout expires. UInt64 GetVersion(); /// /// called by the cluster interface when the status query completes /// /// the new version of the key at the remote process /// the value of the key at the remote process /// the exit code if the process has finished, or 259 (STILL_ACTIVE) /// a descriptive message if something went wrong void OnCompleted(UInt64 newVersion, byte[] statusData, int processExitCode, string errorMessage); } /// /// when the application wants to set a command key on a running process, it passes in an /// IProcessCommand object identifying the key being set, and including a callback method /// that is called when the RPC completes /// public interface IProcessCommand { /// /// the key to set at the process, filled in by the application /// /// the key the application wants to set string GetKey(); /// /// a human-friendly summary of the value being set, filled in by the application /// /// summary of the value being set string GetShortStatus(); /// /// the value to set, filled in by the application /// /// the value being set byte[] GetPayload(); /// /// called by the cluster interface when the command set completes /// /// null on success, or a descriptive error if there was a problem void OnCompleted(string reason); } public interface ICluster { /// /// starts up the cluster. Blocks until the application is ready to proceed /// /// false if the cluster startup fails bool Start(); /// /// shuts down the connection to the cluster during application exit /// void Stop(); /// /// retrieve a list of computers currently available in the cluster. The list /// may change as failures occur, or the cluster elastically changes the resource /// allocation of the application /// /// the computers currently available in the cluster List GetComputers(); /// /// get a Uri to read a file from a computer running on the same host /// /// the computer that wrote the file /// the leafname of the file /// an integer compression mode to put in the query part of the Uri /// a uri that identifies the file locally string GetLocalFilePath(IComputer computer, string directory, string fileName, int compressionMode); /// /// get a Uri to read a file from a remote computer /// /// the computer that wrote the file /// /// an integer compression mode to put in the query part of the Uri /// a uri that identifies the file remotely string GetRemoteFilePath(IComputer computer, string directory, string fileName, int compressionMode); /// /// generate a new Process object that will be used to schedule a process on a cluster /// computer /// /// the callback handler that the cluster will use to update /// the application on the process' lifecycle /// the command line to use to start the process on the remote /// computer /// a handle to the new process IProcess NewProcess(IProcessWatcher watcher, string commandLine); /// /// request that a Process object, return from NewProcess, be scheduled according /// to the supplied affinity hints and constraints /// /// the handle to the previously-created process /// the hints and constraints about where the process should be run void ScheduleProcess(IProcess process, List affinities); /// /// request that a process, previously created using NewProcess, be canceled, either before it is /// scheduled or after it starts running. /// /// the handle to the process void CancelProcess(IProcess process); /// /// query the status of a key at a running process /// /// the handle to the process /// a description of the key, and a callback when the query completes void GetProcessStatus(IProcess process, IProcessKeyStatus status); /// /// set a command key at a running process /// /// the handle to the process /// a description of the command, and callback when the RPC completes void SetProcessCommand(IProcess process, IProcessCommand command); } public class HttpClient { private class PLogger : Microsoft.Research.Peloponnese.ILogger { private readonly ILogger logger; public PLogger(ILogger l) { logger = l; } public void Log( string entry, [CallerFilePath] string file = "(nofile)", [CallerMemberName] string function = "(nofunction)", [CallerLineNumber] int line = -1) { logger.Log(entry, file, function, line); } public void Stop() { } } private static readonly string dummy; private static NotHttpClient client; static HttpClient() { dummy = "string to lock"; } public static void Initialize(ILogger logger) { lock (dummy) { if (client == null) { client = new NotHttpClient(true, 1, 30000, new PLogger(logger)); } } } public static IHttpRequest Create(string uri) { return client.CreateRequest(uri); } public static IHttpRequest Create(Uri uri) { return client.CreateRequest(uri); } } // ----------------------- // below are interfaces implemented by schedulers // ----------------------- /// /// the handle for a process used internally by a scheduler /// public interface ISchedulerProcess { /// /// a unique ID assigned by the scheduler to the process /// string Id { get; } } /// /// the method called by the scheduler when a process is ready to be run on the cluster. When the returned /// Task completes the scheduler reclaims the resource that the process was using. /// /// The location where the process has been scheduled or null if there was a scheduling error /// A unique integer ID for the process at the computer or -1 if there was a scheduling error /// This Task completes if the computer is being shut down in which case /// RunProcess should return early, or null if there was a scheduling error /// null if the process was scheduled, otherwise an error explaining the reason it wasn't /// a Task that completes when the process finishes. This should be immediately returned complete if there was a /// scheduling error public delegate Task RunProcess(IComputer computer, int processId, Task interrupt, string errorReason); /// /// the interface implemented by a Dryad scheduler /// public interface IScheduler { /// /// Start the scheduler. /// /// true if the scheduler started successfully bool Start(); /// /// get a snapshot of the available computers in the cluster. This may change later /// due to failures or elastic resource allocation. /// /// List GetComputers(); /// /// get a computer that is currently running on a given host, or null if there isn't one /// /// the hostname to look up a computer on /// computer at the host, or null if there isn't one IComputer GetComputerAtHost(string host); /// /// get a handle to a new process that can be scheduled later /// /// the handle ISchedulerProcess NewProcess(); /// /// add a process to the scheduling queues, along with affinity information about where it would /// prefer to run and a callback that is triggered when the process has been scheduled /// /// a handle for the process, created earlier using NewProcess /// a description of the hints/constraints about where the process should run /// a callback that is invoked when the process has been scheduled, or if a /// scheduling error occurs void ScheduleProcess(ISchedulerProcess process, List affinities, RunProcess onScheduled); /// /// cancel the scheduling of a process. This will trigger the onScheduled callback if it has not already /// been sent, otherwise it does nothing. /// /// a handle to the process to be canceled void CancelProcess(ISchedulerProcess process); /// /// shut down the scheduler /// void Stop(); } /// /// a factory managing available schedulers /// public class Factory { /// /// delegate to create a scheduler /// /// handle to the application logging interface /// a new scheduler public delegate IScheduler MakeFunction(ILogger logger); /// /// table of registered schedulers each associated with a string /// private static Dictionary registrations; /// /// static initializer /// static Factory() { registrations = new Dictionary(); } /// /// called by a scheduler dll to register itself with the factory /// /// string used to identify the scheduler /// factory function to make a concrete instance of the scheduler /// true if it was registered public static void Register(string type, MakeFunction factory) { registrations.Add(type, factory); } /// /// used internally to create a scheduler /// /// string that the scheduler registered with /// handle to the application logging interface /// a concrete scheduler internal static IScheduler CreateScheduler(string type, ILogger logger) { MakeFunction factory; if (registrations.TryGetValue(type, out factory)) { return factory(logger); } else { throw new ApplicationException("Unknown scheduler type " + type); } } } }