/*
Copyright (c) Microsoft Corporation
All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in
compliance with the License. You may obtain a copy of the License
at http://www.apache.org/licenses/LICENSE-2.0
THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER
EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED WARRANTIES OR CONDITIONS OF
TITLE, FITNESS FOR A PARTICULAR PURPOSE, MERCHANTABLITY OR NON-INFRINGEMENT.
See the Apache Version 2.0 License for specific language governing permissions and
limitations under the License.
*/
using System;
using System.Collections.Generic;
using System.Linq;
using System.Runtime.CompilerServices;
using System.Text;
using System.Threading.Tasks;
using Microsoft.Research.Peloponnese.NotHttpClient;
namespace Microsoft.Research.Dryad.ClusterInterface
{
///
/// this is the connection to the application's logging interface, supplied
/// by the external application
///
public interface ILogger
{
void Log(
string entry,
[CallerFilePath] string file = "(nofile)",
[CallerMemberName] string function = "(nofunction)",
[CallerLineNumber] int line = -1);
}
///
/// this is the information available to the external application about a
/// "computer" in the cluster. This is really a slot in a cluster's resource manager;
/// there may be multiple computers on the same host.
///
public interface IComputer
{
///
/// the unique name of this resource
///
string Name { get; }
///
/// the URI for communicating with processes
///
string ProcessServer { get; }
///
/// the URI for fetching remote files
///
string FileServer { get; }
///
/// the directory for fetching local files
///
string Directory { get; }
///
/// the hostname this is running on, for data locality purposes
///
string Host { get; }
///
/// the cluster rack this is running on, for data locality purposes
///
string RackName { get; }
}
///
/// this is the information available to the external application about a Dryad
/// process that has been started on an IComputer. Basically just a UID to use in
/// logging, and an interface for constructing references to files that it wrote
///
public interface IProcess
{
///
/// a UID for this process
///
string Id { get; }
///
/// the directory for files written by this process
///
string Directory { get; }
}
///
/// affinities can be described at different levels. We only use Computer and Rack
/// for now
///
public enum AffinityResourceLevel
{
Core = 0,
Socket,
Host,
Rack,
Cluster
}
///
/// the external application supplies affinity resources when making scheduling requests
///
public class AffinityResource
{
///
/// the granularity of this affinity; for now we only use Computer and Rack
///
public AffinityResourceLevel level;
///
/// the identifying string for the affinity. It is a UID with respect to the level,
/// so identifies a unique host or rack
///
public string locality;
///
/// create a new affinity object
///
/// the granularity of the affinity
/// the locality of the affinity
public AffinityResource(AffinityResourceLevel l, string place)
{
level = l;
locality = place;
}
}
///
/// a particular affinity that a process has to be scheduled somewhere
///
public class Affinity
{
///
/// if this is true, the process may not run anywhere else
///
public bool isHardContraint;
///
/// this is a list of equally good places to run, e.g. the locations of
/// all the replicas of a file
///
public List affinities;
///
/// this is a weight, corresponding to the amount of data the process
/// would like to read from the locality in question
///
public UInt64 weight;
///
/// make a new affinity descriptor
///
/// true if this is a hard constraint and the process may not run anywhere else
/// weight corresponding to the amount of data the process would like to read from here
public Affinity(bool hc, UInt64 w)
{
isHardContraint = hc;
affinities = new List();
weight = w;
}
}
///
/// state returned when a the status of a key at a process is queried after the process has started running
///
public enum ProcessState
{
///
/// the process is still running
///
Running,
///
/// the process has reported successful completion
///
Completed,
///
/// the process has exited without reporting successful completion
///
Failed
}
///
/// state returned when a process has exited
///
public enum ProcessExitState
{
///
/// the process was never started due to an error
///
ScheduleFailed,
///
/// contact was lost with the process
///
StatusFailed,
///
/// the process was canceled while scheduling
///
ScheduleCanceled,
///
/// the process had started, and has now exited
///
ProcessExited
}
///
/// When the higher level of the software stack wants to schedule a process, it
/// passes in an object that implements IProcessWatcher to receive updates in the
/// form of callbacks as the process is queued, matched, scheduled, run, etc.
///
public interface IProcessWatcher
{
///
/// OnQueued is called when the process has been placed in the scheduling queues.
///
void OnQueued();
///
/// OnMatched is called when the process has been matched to computer
/// and is about to be scheduled there.
///
/// The computer the process has been scheduled on
/// The UTC time on the local computer that the process was scheduled
void OnMatched(IComputer computer, long timestamp);
///
/// OnCreated is called when the process has been created on the remote computer.
///
/// The UTC time on the local computer that the remote daemon responded to the process create request
void OnCreated(long timestamp);
///
/// OnStarted is called when the process has started running on the remote computer.
///
/// The UTC time on the remote computer that the process started running
void OnStarted(long timestamp);
///
/// OnExited is called when the process has finished, either because it could not be
/// created (state=ScheduleFailed), because contact was lost with its daemon
/// (state=StatusFailed) or because it has finished (state=ProcessExited). exitCode is
/// the process exit code
///
/// How far through scheduling the process got
/// The UTC time on the remote computer that the process stopped running, or on the local computer
/// if we lost contact with the remote daemon
/// The exit code of the process if it was started, or 1 otherwise
/// A description of the error if the process didn't exit cleanly
void OnExited(ProcessExitState state, long timestamp, int exitCode, string errorText);
}
///
/// when the application wants to learn the status of a key on a running process, it passes in an
/// IProcessStatus object identifying the key being queried, and including a callback method
/// that is called when the status is known
///
public interface IProcessKeyStatus
{
///
/// the key to query at the process, filled in by the application
///
/// the key the application wants to query
string GetKey();
///
/// the heartbeat timeout, filled in by the application
///
/// how long to block waiting for the key to change before returning its status.
/// OnCompleted will be called either when the version increases, or the timeout expires.
int GetTimeout();
///
/// the last known version of the key, filled in by the application
///
/// the last version of the key seen by the application. OnCompleted will be called either
/// when the version increases, or the timeout expires.
UInt64 GetVersion();
///
/// called by the cluster interface when the status query completes
///
/// the new version of the key at the remote process
/// the value of the key at the remote process
/// the exit code if the process has finished, or 259 (STILL_ACTIVE)
/// a descriptive message if something went wrong
void OnCompleted(UInt64 newVersion, byte[] statusData, int processExitCode, string errorMessage);
}
///
/// when the application wants to set a command key on a running process, it passes in an
/// IProcessCommand object identifying the key being set, and including a callback method
/// that is called when the RPC completes
///
public interface IProcessCommand
{
///
/// the key to set at the process, filled in by the application
///
/// the key the application wants to set
string GetKey();
///
/// a human-friendly summary of the value being set, filled in by the application
///
/// summary of the value being set
string GetShortStatus();
///
/// the value to set, filled in by the application
///
/// the value being set
byte[] GetPayload();
///
/// called by the cluster interface when the command set completes
///
/// null on success, or a descriptive error if there was a problem
void OnCompleted(string reason);
}
public interface ICluster
{
///
/// starts up the cluster. Blocks until the application is ready to proceed
///
/// false if the cluster startup fails
bool Start();
///
/// shuts down the connection to the cluster during application exit
///
void Stop();
///
/// retrieve a list of computers currently available in the cluster. The list
/// may change as failures occur, or the cluster elastically changes the resource
/// allocation of the application
///
/// the computers currently available in the cluster
List GetComputers();
///
/// get a Uri to read a file from a computer running on the same host
///
/// the computer that wrote the file
/// the leafname of the file
/// an integer compression mode to put in the query part of the Uri
/// a uri that identifies the file locally
string GetLocalFilePath(IComputer computer, string directory, string fileName, int compressionMode);
///
/// get a Uri to read a file from a remote computer
///
/// the computer that wrote the file
///
/// an integer compression mode to put in the query part of the Uri
/// a uri that identifies the file remotely
string GetRemoteFilePath(IComputer computer, string directory, string fileName, int compressionMode);
///
/// generate a new Process object that will be used to schedule a process on a cluster
/// computer
///
/// the callback handler that the cluster will use to update
/// the application on the process' lifecycle
/// the command line to use to start the process on the remote
/// computer
/// a handle to the new process
IProcess NewProcess(IProcessWatcher watcher, string commandLine);
///
/// request that a Process object, return from NewProcess, be scheduled according
/// to the supplied affinity hints and constraints
///
/// the handle to the previously-created process
/// the hints and constraints about where the process should be run
void ScheduleProcess(IProcess process, List affinities);
///
/// request that a process, previously created using NewProcess, be canceled, either before it is
/// scheduled or after it starts running.
///
/// the handle to the process
void CancelProcess(IProcess process);
///
/// query the status of a key at a running process
///
/// the handle to the process
/// a description of the key, and a callback when the query completes
void GetProcessStatus(IProcess process, IProcessKeyStatus status);
///
/// set a command key at a running process
///
/// the handle to the process
/// a description of the command, and callback when the RPC completes
void SetProcessCommand(IProcess process, IProcessCommand command);
}
public class HttpClient
{
private class PLogger : Microsoft.Research.Peloponnese.ILogger
{
private readonly ILogger logger;
public PLogger(ILogger l)
{
logger = l;
}
public void Log(
string entry,
[CallerFilePath] string file = "(nofile)",
[CallerMemberName] string function = "(nofunction)",
[CallerLineNumber] int line = -1)
{
logger.Log(entry, file, function, line);
}
public void Stop()
{
}
}
private static readonly string dummy;
private static NotHttpClient client;
static HttpClient()
{
dummy = "string to lock";
}
public static void Initialize(ILogger logger)
{
lock (dummy)
{
if (client == null)
{
client = new NotHttpClient(true, 1, 30000, new PLogger(logger));
}
}
}
public static IHttpRequest Create(string uri)
{
return client.CreateRequest(uri);
}
public static IHttpRequest Create(Uri uri)
{
return client.CreateRequest(uri);
}
}
// -----------------------
// below are interfaces implemented by schedulers
// -----------------------
///
/// the handle for a process used internally by a scheduler
///
public interface ISchedulerProcess
{
///
/// a unique ID assigned by the scheduler to the process
///
string Id { get; }
}
///
/// the method called by the scheduler when a process is ready to be run on the cluster. When the returned
/// Task completes the scheduler reclaims the resource that the process was using.
///
/// The location where the process has been scheduled or null if there was a scheduling error
/// A unique integer ID for the process at the computer or -1 if there was a scheduling error
/// This Task completes if the computer is being shut down in which case
/// RunProcess should return early, or null if there was a scheduling error
/// null if the process was scheduled, otherwise an error explaining the reason it wasn't
/// a Task that completes when the process finishes. This should be immediately returned complete if there was a
/// scheduling error
public delegate Task RunProcess(IComputer computer, int processId, Task interrupt, string errorReason);
///
/// the interface implemented by a Dryad scheduler
///
public interface IScheduler
{
///
/// Start the scheduler.
///
/// true if the scheduler started successfully
bool Start();
///
/// get a snapshot of the available computers in the cluster. This may change later
/// due to failures or elastic resource allocation.
///
///
List GetComputers();
///
/// get a computer that is currently running on a given host, or null if there isn't one
///
/// the hostname to look up a computer on
/// computer at the host, or null if there isn't one
IComputer GetComputerAtHost(string host);
///
/// get a handle to a new process that can be scheduled later
///
/// the handle
ISchedulerProcess NewProcess();
///
/// add a process to the scheduling queues, along with affinity information about where it would
/// prefer to run and a callback that is triggered when the process has been scheduled
///
/// a handle for the process, created earlier using NewProcess
/// a description of the hints/constraints about where the process should run
/// a callback that is invoked when the process has been scheduled, or if a
/// scheduling error occurs
void ScheduleProcess(ISchedulerProcess process, List affinities, RunProcess onScheduled);
///
/// cancel the scheduling of a process. This will trigger the onScheduled callback if it has not already
/// been sent, otherwise it does nothing.
///
/// a handle to the process to be canceled
void CancelProcess(ISchedulerProcess process);
///
/// shut down the scheduler
///
void Stop();
}
///
/// a factory managing available schedulers
///
public class Factory
{
///
/// delegate to create a scheduler
///
/// handle to the application logging interface
/// a new scheduler
public delegate IScheduler MakeFunction(ILogger logger);
///
/// table of registered schedulers each associated with a string
///
private static Dictionary registrations;
///
/// static initializer
///
static Factory()
{
registrations = new Dictionary();
}
///
/// called by a scheduler dll to register itself with the factory
///
/// string used to identify the scheduler
/// factory function to make a concrete instance of the scheduler
/// true if it was registered
public static void Register(string type, MakeFunction factory)
{
registrations.Add(type, factory);
}
///
/// used internally to create a scheduler
///
/// string that the scheduler registered with
/// handle to the application logging interface
/// a concrete scheduler
internal static IScheduler CreateScheduler(string type, ILogger logger)
{
MakeFunction factory;
if (registrations.TryGetValue(type, out factory))
{
return factory(logger);
}
else
{
throw new ApplicationException("Unknown scheduler type " + type);
}
}
}
}