Dryad/xcompute_managed/VertexScheduler.cs

1252 lines
52 KiB
C#

/*
Copyright (c) Microsoft Corporation
All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in
compliance with the License. You may obtain a copy of the License
at http://www.apache.org/licenses/LICENSE-2.0
THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER
EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED WARRANTIES OR CONDITIONS OF
TITLE, FITNESS FOR A PARTICULAR PURPOSE, MERCHANTABLITY OR NON-INFRINGEMENT.
See the Apache Version 2.0 License for specific language governing permissions and
limitations under the License.
*/
namespace Microsoft.Research.Dryad
{
using System;
using System.Collections.Generic;
using System.Collections.Specialized;
using System.Globalization;
using System.Threading;
using System.Diagnostics;
using Microsoft.Research.Dryad;
public class VertexScheduler
{
private ProcessTable processTable = null;
private DispatcherPool dispatcherPool = new DispatcherPool();
private DispatcherPool badDispatcherPool = new DispatcherPool();
private RequestPool requestPool = new RequestPool();
private ISchedulerHelper schedulerHelper = SchedulerHelperFactory.GetInstance();
private VertexCallbackServiceHost callbackServiceHost;
private JobStatus jobStatus = null;
private string baseUri;
private string replyUri;
private int JobId = 0;
private const int currentProcess = 1;
private int processId = 0;
private object dispatcherChangeLock = new object();
#region Public Members
public void CancelScheduleProcess(int processId)
{
DryadLogger.LogMethodEntry(processId);
XComputeProcess proc = null;
if (processTable.TryGetValue(processId, out proc) == false)
{
// We don't know about this process
DryadLogger.LogWarning("Cancel process", "Attempt to cancel unknown process, id {0}", processId);
return;
}
// Try to remove it from request pool (unassigned)
if (requestPool.Cancel(processId))
{
DryadLogger.LogInformation("Cancel process", "Process request removed from request pool for process id {0}", processId);
return;
}
// Handle already assigned processes
proc.Cancel();
}
public void CloseVertexProcess(int processId)
{
XComputeProcess proc = null;
if (processId == currentProcess)
{
// We don't maintain an entry in the process table for the current process
return;
}
if (processTable.TryGetValue(processId, out proc))
{
// else if it's already assigned, release it at the node
lock (proc.SyncRoot)
{
if (proc.Dispatcher != null)
{
if (proc.CurrentState != ProcessState.Completed)
{
// This can happen when the GM cancels a process and closes the handle right afterward.
// We may not have received the state change from the cancellation yet.
// Note that the handle was closed by the GM, but do nothing else to avoid leaking a Dispatcher.
// ProcessExit will use this to know whether it also needs to close the handle.
DryadLogger.LogDebug("Close vertex process", "Closing handle for process id {0} in state {1} - delaying close until process exit", processId, proc.CurrentState);
proc.HandleClosed = true;
}
else
{
try
{
proc.Dispatcher.ReleaseProcess(processId);
}
finally
{
// Graph Manager is done with the process at this point is called so remove it from the table
processTable.Remove(processId);
}
}
}
else
{
DryadLogger.LogInformation("Close vertex process", "Dispatcher is null for process id {0} - it was either unscheduled or the dispatcher faulted", processId);
}
}
}
else
{
DryadLogger.LogError(0, null, "Unknown process id {0}", processId);
}
}
public void CreateVertexProcess(int processId)
{
XComputeProcess proc = new XComputeProcess(processId);
this.processTable.Add(processId, proc);
proc.ChangeState(ProcessState.Unscheduled);
}
public string CurrentProcessLocalPath
{
get
{
return ProcessPathHelper.ProcessPath(this.processId);
}
}
public string CurrentProcessRemotePath
{
get
{
return GetProcessPath(this.processId, null);
}
}
public string[] EnumerateProcessNodes()
{
return dispatcherPool.Nodes.ToArray();
}
public string GetAssignedNode(int processId)
{
// TODO: Need to fix for local executor if it's supported again
if (processId == currentProcess)
{
return AzureUtils.CurrentHostName;
}
else if (this.processTable.ContainsKey(processId))
{
return this.processTable[processId].AssignedNode;
}
else
{
return null;
}
}
public uint GetExitCode(int processId)
{
return this.processTable[processId].ExitCode;
}
public string GetProcessPath(int processId, string relativePath)
{
// TODO: Need to fix for local executor if it's supported again
string node = GetAssignedNode(processId);
if (String.IsNullOrEmpty(node))
{
return null;
}
else
{
string path = String.Format(@"\\{0}\{1}\{2}\{3}\{4}", node, Constants.DscTempShare, Environment.UserName, this.JobId, processId);
if (relativePath != null && relativePath.Length > 0)
{
path += @"\" + relativePath;
}
return path;
}
}
public ProcessState GetProcessState(int processId)
{
if (processId == currentProcess)
{
return ProcessState.Running;
}
else if (this.processTable.ContainsKey(processId))
{
return this.processTable[processId].CurrentState;
}
else
{
return ProcessState.Completed;
}
}
public bool IsGraphManager
{
get
{
return (processId == 1);
}
}
public bool IsVertex
{
get
{
return (processId > 1);
}
}
public bool IsVertexRerun
{
get
{
return (processId == 0);
}
}
public JobStatus JobStatus
{
get { return this.jobStatus; }
}
public void NotifyStateChange(int processId, long timeoutInterval, ProcessState targetState, StateChangeEventHandler handler)
{
this.processTable[processId].AddStateChangeListener(targetState, timeoutInterval, handler);
}
public void ProcessChangeState(int processId, ProcessState newState)
{
XComputeProcess proc = null;
if (this.processTable.TryGetValue(processId, out proc))
{
DryadLogger.LogDebug("Process Change State", "Process {0} changed to state {1}", processId, newState);
if (newState == ProcessState.Running)
{
// Need to ensure that the process transitions to AssignedToNode before
// transitioning to Running, or the GM gets mildly confused
ThreadPool.QueueUserWorkItem(new WaitCallback(proc.TransitionToRunning));
}
else
{
proc.ChangeState(newState);
}
}
}
public void ProcessExit(int processId, int exitCode)
{
ProcessExit(processId, exitCode, false);
}
public bool ProcessCancelled(int processId)
{
if (processTable.ContainsKey(processId))
{
return processTable[processId].Cancelled;
}
return false;
}
public bool ScheduleProcess(int processId, string commandLine, List<SoftAffinity> softAffinities, string hardAffinity, StringDictionary environment)
{
bool retVal = false;
processTable[processId].SetIdAndVersion(commandLine);
DryadLogger.LogInformation("Schedule process", "Internal ID {0} corresponds to vertex {1}.{2}", processId, processTable[processId].GraphManagerId, processTable[processId].GraphManagerVersion);
DryadLogger.LogInformation("Schedule process", "Internal ID {0} has a command line of {1}", processId,
commandLine);
if (environment == null)
{
environment = new StringDictionary();
}
environment[Constants.jobManager] = AzureUtils.CurrentHostName;
environment["CCP_DRYADPROCID"] = processId.ToString(CultureInfo.InvariantCulture);
ScheduleProcessRequest req = new ScheduleProcessRequest(processId, commandLine, softAffinities, hardAffinity, environment);
Dispatcher dispatcher = null;
// Take the request pool lock in case a ProcessExit comes in after we've looked for a node
// but before the request has been added to the request pool.
lock (requestPool.SyncRoot)
{
if (!FindNodeForRequest(req, out dispatcher))
{
if (dispatcherPool.Count > 0)
{
DryadLogger.LogDebug("Schedule Process", "No nodes available, adding process {0} to request pool", processId);
requestPool.Add(req);
return true;
}
else
{
DryadLogger.LogCritical(0, null, "No available dispatchers");
return false;
}
}
}
// Found a Dispatcher, schedule the request outside of the lock
retVal = ScheduleProcess(req, dispatcher);
if (!retVal)
{
processTable[processId].ChangeState(ProcessState.SchedulingFailed);
dispatcher.Release();
}
return retVal;
}
public bool SetGetProps(int processId, ProcessPropertyInfo[] infos, string blockOnLabel, ulong blockOnVersion, long maxBlockTime, string getPropLabel, bool ProcessStatistics, GetSetPropertyEventHandler handler)
{
if (this.processTable.ContainsKey(processId))
{
if (infos != null && infos.Length > 0)
{
// Only add for the first property info since we only want to fire completion once per request
this.processTable[processId].AddPropertyListener(infos[0].propertyLabel, infos[0].propertyVersion, handler);
}
else if (getPropLabel != null && getPropLabel.Length > 0)
{
this.processTable[processId].AddPropertyListener(getPropLabel, 0, handler);
}
else
{
DryadLogger.LogError(0, null, "infos and getPropLabel both empty");
return false;
}
lock (this.processTable[processId].SyncRoot)
{
if (this.processTable[processId].Dispatcher != null)
{
if (this.processTable[processId].Dispatcher.SetGetProps(replyUri, processId, infos, blockOnLabel, blockOnVersion, maxBlockTime, getPropLabel, ProcessStatistics))
{
return true;
}
}
}
// Keep returning error to GM and let its fault-tolerance kick in
if (dispatcherPool.Count == 0)
{
DryadLogger.LogCritical(0, null, "All dispatchers are faulted.");
}
return false;
}
else
{
DryadLogger.LogError(0, null, "process id {0} not found in process table", processId);
return false;
}
}
public void SetGetPropsComplete(int processId, ProcessInfo info, string[] propertyLabels, ulong[] propertyVersions)
{
XComputeProcess proc = null;
if (processTable.TryGetValue(processId, out proc))
{
proc.SetGetPropsComplete(info, propertyLabels, propertyVersions);
}
else
{
DryadLogger.LogError(0, null, "process id {0} not found in process table", processId);
}
}
/// <summary>
/// When called from the GM, shuts down all the vertex services and closes the communication channels.
/// When called from the vertex host, closes the communication channel to the local vertex service.
/// </summary>
/// <param name="ShutdownCode">Code to pass to the vertex services. Currently unused.</param>
public void Shutdown(uint ShutdownCode)
{
DryadLogger.LogMethodEntry(ShutdownCode);
// If this is the GM, invoke Shutdown asynchronously to improve job shutdown time
if (processId == 1)
{
// We no longer need to listen for task state changes
schedulerHelper.StopTaskMonitorThread();
lock (dispatcherPool.SyncRoot)
{
foreach (Dispatcher disp in dispatcherPool)
{
DryadLogger.LogDebug("Shutdown", "Calling Shutdown on dispatcher for node {0}", disp.NodeName);
Stopwatch sw = new Stopwatch();
sw.Start();
try
{
disp.Shutdown(0);
sw.Stop();
}
catch (Exception e)
{
sw.Stop();
DryadLogger.LogError(0, e, "Exception calling Shutdown on dispatcher for node {0}", disp.NodeName);
}
DryadLogger.LogDebug("Shutdown", "Dispatcher.Shutdown took {0} ms", sw.ElapsedMilliseconds);
}
}
}
// Dispose the SchedulerHelper instance to clean up resources
schedulerHelper.Dispose();
schedulerHelper = null;
// Clean out the dispatcher pool (this also disposes all dispatchers)
dispatcherPool.Clear();
// Stop the callback service
callbackServiceHost.Stop();
DryadLogger.LogMethodExit();
}
public bool WaitForStateChange(int processId, long timeoutInterval, ProcessState targetState)
{
DryadLogger.LogDebug("Wait for state change", "Process id: {0}, targetState: {1}", processId, targetState);
if (this.processTable.ContainsKey(processId))
{
using (ManualResetEvent waitEvent = new ManualResetEvent(false))
{
this.processTable[processId].AddStateChangeWaiter(targetState, waitEvent);
return waitEvent.WaitOne(TimeSpan.FromMilliseconds(timeoutInterval / 10), false);
}
}
else
{
DryadLogger.LogError(0, null, "process id {0} not found in process table", processId);
return false;
}
}
#endregion
#region Private Members
private VertexScheduler(ProcessTable table)
{
this.processTable = table;
this.jobStatus = new JobStatus(schedulerHelper);
// These environment variables will not be set when the vertex rerun command is executed
// Set them to 0 so we can use them later to detect that we're rerunning a vertex outside of an HPC job
if (!Int32.TryParse(Environment.GetEnvironmentVariable("CCP_JOBID"), out JobId))
{
JobId = 0;
}
if (!Int32.TryParse(Environment.GetEnvironmentVariable("CCP_DRYADPROCID"), out processId))
{
processId = 0;
}
this.baseUri = String.Format(Constants.vertexCallbackAddrFormat, AzureUtils.CurrentHostName, processId);
this.replyUri = this.baseUri + Constants.vertexCallbackServiceName;
this.callbackServiceHost = new VertexCallbackServiceHost(this);
}
/// <summary>
/// Create a new dispatcher and add to the good dispatcher pool.
/// </summary>
/// <param name="taskid">HPC Task Id</param>
/// <param name="node">Name of node this dispatcher is for</param>
/// <param name="state">State of task when dispatcher is created (always Running now)</param>
/// <returns>Dispatcher that was added, or null if a dispatcher already exists in the good pool for specified node</returns>
private Dispatcher AddDispatcher(int taskid, string node, VertexTaskState state)
{
VertexComputeNode cn = new VertexComputeNode();
cn.instanceId = taskid;
cn.ComputeNode = node;
cn.State = state;
Dispatcher d = new Dispatcher(schedulerHelper, cn);
d.FaultedEvent += new DispatcherFaultedEventHandler(OnDispatcherFaulted);
if (!dispatcherPool.Add(d))
{
// There's already a dispatcher for this node
d.Dispose();
d = null;
}
return d;
}
private bool FindRequestForNode(string node, out ScheduleProcessRequest req)
{
req = null;
ulong maxAffinity = 0;
bool result = false;
Dispatcher dispatcher = null;
Stopwatch swTotal = new Stopwatch();
Stopwatch swSearch = new Stopwatch();
Stopwatch swBlock = new Stopwatch();
int requestCount = 0;
swTotal.Start();
if (dispatcherPool.TryReserveDispatcher(node, out dispatcher))
{
swBlock.Start();
lock (requestPool.SyncRoot)
{
swBlock.Stop();
swSearch.Start();
requestCount = requestPool.Count;
if (requestCount != 0)
{
foreach (ScheduleProcessRequest r in requestPool)
{
// Skip any lingering processes which have been cancelled.
if (processTable.ContainsKey(r.Id) && processTable[r.Id].Cancelled)
{
continue;
}
if (r.MustRunOnNode(node))
{
req = r;
DryadLogger.LogDebug("Find Request for Node", "process {0} has hard affinity constraint for node {1}", req.Id, node);
break;
}
else if (r.CanRunOnNode(node))
{
ulong thisAffinity = r.GetAffinityWeightForNode(node);
if (thisAffinity == 0 && req == null)
{
req = r;
DryadLogger.LogDebug("Find Request for Node", "Process {0} has 0 affinity constraint for node {1} but no other process has been selected yet", r.Id, node);
}
else if (thisAffinity > maxAffinity)
{
maxAffinity = thisAffinity;
req = r;
DryadLogger.LogDebug("Find Request for Node", "Process {0} with affinity constraint {1} for node {2} larger than previous max", r.Id, thisAffinity, node);
}
}
}
}
swSearch.Stop();
if (req != null)
{
requestPool.Remove(req);
DryadLogger.LogDebug("Find Request for Node", "Found request {0} for node {1}", req.Id, node);
result = true;
}
else
{
DryadLogger.LogDebug("Find Request for Node", "Did not find any requests for node {0}", node);
dispatcher.Release();
result = false;
}
}
}
swTotal.Stop();
DryadLogger.LogInformation("Find Request for Node", "Searching {0} requests. Block {1} ms. Inner search {2} ms. Total elapsed time {3} ms.",
requestCount, swBlock.ElapsedMilliseconds, swSearch.ElapsedMilliseconds, swTotal.ElapsedMilliseconds);
return result;
}
private bool FindNodeForRequest(ScheduleProcessRequest req, out Dispatcher dispatcher)
{
dispatcher = null;
if (req.HardAffinity != null)
{
if (dispatcherPool.TryReserveDispatcher(req.HardAffinity, out dispatcher))
{
return true;
}
else
{
return false;
}
}
else
{
// First try soft affinity in decreasing order (assumes Soft Affinity list in req is sorted descending by weight)
// Keep a map of the nodes we've already tried, because Dryad adds each affinity twice
// once for the node and once for the "pod"
Dictionary<string, bool> attemptedNodes = new Dictionary<string, bool>();
int count = 0;
for (int i = 0; i < req.AffinityCount; i++)
{
if (attemptedNodes.ContainsKey(req.AffinityAt(i).Node.ToUpper()))
{
continue;
}
attemptedNodes.Add(req.AffinityAt(i).Node.ToUpper(), true);
count++;
if (dispatcherPool.TryReserveDispatcher(req.AffinityAt(i).Node, out dispatcher))
{
DryadLogger.LogDebug("Find Node For Request", "process {0} satisfied affinity constraint: node {1}, weight {2}", req.Id, req.AffinityAt(i).Node, req.AffinityAt(i).Weight);
return true;
}
DryadLogger.LogDebug("Find Node For Request", "process {0} did not satisfy affinity constraint: node {1}, weight {2}", req.Id, req.AffinityAt(i).Node, req.AffinityAt(i).Weight);
}
// If we get this far and AffinityCount > 0, then we failed to satisfy the affinity constraints
// log a message so we can more easily detect this situation
if (count > 0)
{
DryadLogger.LogInformation("Find Node For Request", "process {0} failed to satisfy any of {1} affinity constraints", req.Id, count);
}
// Finally try any available node
lock (dispatcherPool.SyncRoot)
{
foreach (Dispatcher d in dispatcherPool)
{
if (req.CanRunOnNode(d.NodeName))
{
if (d.Reserve())
{
dispatcher = d;
return true;
}
}
}
}
}
return false;
}
private void Initialize()
{
if (IsVertexRerun)
{
// Vertex rerun command is being executed, don't create any dispatchers
return;
}
else if (IsGraphManager)
{
VertexChangeEventHandler evtHandler = new VertexChangeEventHandler(OnVertexChanged);
schedulerHelper.OnVertexChange += evtHandler;
schedulerHelper.StartTaskMonitorThread();
if (!schedulerHelper.WaitForTasksReady())
{
// The graph manager will abort because we will not return any vertex nodes
DryadLogger.LogCritical(0, null, "Unable to begin job: too many vertex tasks failed");
schedulerHelper.OnVertexChange -= evtHandler;
dispatcherPool.Clear();
return;
}
// TODO: we need to be able to turn this off
//ThreadPool.QueueUserWorkItem(new WaitCallback(VertexMonitorThreadFunc));
}
// IsVertex
else
{
// On vertex nodes, create a dispatcher for the local vertex service and add an entry to the process table for the local process
Dispatcher d = AddDispatcher(Int32.Parse(Environment.GetEnvironmentVariable(Constants.taskIdEnvVar)), "localhost", VertexTaskState.Running);
XComputeProcess proc = new XComputeProcess(processId);
lock (proc.SyncRoot)
{
proc.Dispatcher = d;
}
this.processTable.Add(processId, proc);
}
callbackServiceHost.Start(this.baseUri, this.schedulerHelper);
}
/// <summary>
/// When a dispatcher faults due to a communication error (as opposed to a task failure)
/// it is moved to the bad dispatcher pool and a timer is set to retry the dispatcher
/// after a predetermined interval. This method is called when that timer fires.
/// </summary>
/// <param name="state">The dispatcher to be retried</param>
private void RetryFaultedDispatcher(object state)
{
DryadLogger.LogMethodEntry();
Dispatcher newDispatcher = null;
Dispatcher d = state as Dispatcher;
if (d != null)
{
DryadLogger.LogDebug("Retry faulted dispatcher", "Creating new dispatcher for node {0}", d.NodeName);
lock (dispatcherChangeLock)
{
// Add a new dispatcher for this node
newDispatcher = AddDispatcher(d.TaskId, d.NodeName, VertexTaskState.Running);
// Get rid of the old dispatcher
badDispatcherPool.Remove(d);
d.Dispose();
}
if (newDispatcher != null)
{
// Look for a request to run on this node
ThreadPool.QueueUserWorkItem(new WaitCallback(this.FindRequestForNodeThreadFunc), newDispatcher);
}
}
else
{
DryadLogger.LogWarning("Retry faulted dispatcher", "state parameter not a valid dispatcher");
}
DryadLogger.LogMethodExit();
}
/// <summary>
/// This is the event handler for the Dispatcher.FaultedEvent event.
/// The FaultedEvent event is raised when a task transitions out of a running
/// state, or when there is a communication error wich does not succeed after N retries.
/// </summary>
/// <param name="sender">The dispatcher raising the faulted event</param>
/// <param name="e">Not used</param>
private void OnDispatcherFaulted(object sender, EventArgs e)
{
Dispatcher d = sender as Dispatcher;
if (d != null)
{
DryadLogger.LogWarning("Dispatcher Faulted", "Dispatcher for node '{0}' faulted due to {1}", d.NodeName, d.SchedulerTaskFailed ? "failed YARN Container" : "communication error");
lock (dispatcherChangeLock)
{
// Remove from dispatcher pool
dispatcherPool.Remove(d);
if (d.SchedulerTaskFailed)
{
// If we're faulting because the scheduler task transitioned to
// a non-running state, then we want to completely remove the dispatcher
badDispatcherPool.Remove(d);
d.Dispose();
}
else
{
// If we're faulting because of a communication error, then we want to
// add to bad dispatcher pool so that we'll retry it again
badDispatcherPool.Add(d);
// Set up a timer to move this dispatcher out of the bad pool in the future
d.SetRetryTimer(new TimerCallback(this.RetryFaultedDispatcher));
}
}
if (d.CurrentProcess != Dispatcher.InvalidProcessId)
{
ProcessExit(d.CurrentProcess, unchecked((int)Constants.DrError_ProcessingInterrupted), true);
}
}
}
private void CheckForOutOfDispatchers()
{
if (badDispatcherPool.Count == 0 && dispatcherPool.Count == 0)
{
DryadLogger.LogError(0, null, "All vertex tasks have failed");
lock (requestPool.SyncRoot)
{
foreach (ScheduleProcessRequest r in requestPool)
{
XComputeProcess proc;
if (processTable.TryGetValue(r.Id, out proc))
{
DryadLogger.LogInformation("No Valid Dispatchers", "Transitioning process {0} to state {1} because all vertex tasks failed", r.Id, ProcessState.SchedulingFailed.ToString());
proc.ChangeState(ProcessState.SchedulingFailed);
}
else
{
DryadLogger.LogCritical(0, null, "Failed to find process {0} in process table, exiting application.", r.Id);
throw new ApplicationException(String.Format("All vertex tasks failed and unable to cancel pending request id {0}", r.Id));
}
}
requestPool.Clear();
}
}
}
/// <summary>
/// This event handler is called from ISchedulerHelper task monitoring thread in response
/// to an HPC Task state change.
/// </summary>
/// <param name="sender">Not used</param>
/// <param name="e">Information about the task state transition</param>
private void OnVertexChanged(object sender, VertexChangeEventArgs e)
{
Dispatcher oldDispatcher = null;
Dispatcher newDispatcher = null;
bool addNewDispatcher = false;
bool faultOldDispatcher = false;
lock (dispatcherChangeLock)
{
bool dispatcherFound = dispatcherPool.GetByTaskId(e.Id, out oldDispatcher);
if (!dispatcherFound)
{
// Check to see if this dispatcher was already faulted due to a communication error
dispatcherFound = badDispatcherPool.GetByTaskId(e.Id, out oldDispatcher);
}
// Task state change
if (e.OldState != e.NewState)
{
// Transitioning to, e.g., queued
if (e.NewState < VertexTaskState.Running)
{
DryadLogger.LogInformation("Vertex Task State Change", "Task {0} transitioned to waiting", e.Id);
// If there is a dispatcher for the task, then the task has previously been running.
// Now it's not, so we need to fault the dispatcher.
if (dispatcherFound)
{
DryadLogger.LogWarning("Vertex Task State Change", "Previously running task {0} transitioned to waiting", e.Id);
faultOldDispatcher = true;
}
}
// Transition to running
else if (e.NewState == VertexTaskState.Running)
{
if (!dispatcherFound)
{
// No dispatcher for task, add a new one
DryadLogger.LogInformation("Vertex Task State Change", "Task {0} transitioned to running", e.Id);
addNewDispatcher = true;
}
else if (String.Compare(e.OldNode, e.NewNode, StringComparison.OrdinalIgnoreCase) != 0)
{
// Dispatcher found, but task is now on a new node
// 1. Make sure old dispatcher is faulted.
// 2. Add a new one for the new node
DryadLogger.LogInformation("Vertex Task State Change", "Running task {0} assigned to new node", e.Id);
faultOldDispatcher = true;
addNewDispatcher = true;
}
else
{
// Dispatcher found, task is on same node
DryadLogger.LogWarning("Vertex Task State Change", "Change notification for running task {0}, but state and node are unchanged in notification", e.Id);
}
}
// Job is exiting, nothing to do
else if (e.NewState == VertexTaskState.Finished)
{
DryadLogger.LogDebug("Vertex Task State Change", "Task {0} transitioned to finished", e.Id);
}
// Failed or Cancelled
else
{
DryadLogger.LogWarning("Vertex Task State Change", "Task {0} transitioned to failed or cancelled", e.Id);
// Fault dispatcher if it isn't already
if (dispatcherFound)
{
faultOldDispatcher = true;
}
}
}
// Node change
else if (String.Compare(e.OldNode, e.NewNode, StringComparison.OrdinalIgnoreCase) != 0)
{
if (e.NewState == VertexTaskState.Running)
{
DryadLogger.LogDebug("Vertex Task State Change", "Task {0} moved from node {1} to node {2}", e.Id, e.OldNode, e.NewNode);
if (dispatcherFound)
{
faultOldDispatcher = true;
addNewDispatcher = true;
}
}
}
// Running -> Queued -> Running, e.g.
else if (e.OldRequeueCount < e.NewRequeueCount)
{
DryadLogger.LogDebug("Vertex Task State Change", "Task {0} node {1} state {2} unchanged from previous state: likely missed a state change notification.",
e.Id, e.NewNode, e.NewState.ToString());
// Was task running previously? If so, fault the old dispatcher.
if (dispatcherFound)
{
faultOldDispatcher = true;
}
// Is task running now? If so, create a new dispatcher to re-establish connection.
if (e.NewState == VertexTaskState.Running)
{
addNewDispatcher = true;
}
}
}
if (faultOldDispatcher)
{
oldDispatcher.RaiseFaultedEvent(true);
}
if (addNewDispatcher)
{
newDispatcher = AddDispatcher(e.Id, e.NewNode, e.NewState);
if (newDispatcher != null)
{
// Look for new request for node
ThreadPool.QueueUserWorkItem(new WaitCallback(FindRequestForNodeThreadFunc), newDispatcher);
}
else
{
DryadLogger.LogError(0, null, "Failed to add new dispatcher for node {0}", e.NewNode);
}
}
if (faultOldDispatcher)
{
// Check to see if we have any dispatchers left. If not, we need to fail
// everything in the request pool.
CheckForOutOfDispatchers();
}
}
private void ProcessExit(int processId, int exitCode, bool dispatcherFaulted)
{
DryadLogger.LogMethodEntry(processId, exitCode, dispatcherFaulted);
try
{
XComputeProcess proc = null;
if (processTable.TryGetValue(processId, out proc))
{
DryadLogger.LogInformation("Process Exit", "found process {0} for vertex {1}.{2}", processId, proc.GraphManagerId, proc.GraphManagerVersion);
// Update process
if (proc.CurrentState < ProcessState.AssignedToNode && dispatcherFaulted)
{
// If we haven't yet reached AssignedToNode and the dispatcher faulted, then scheduling failed
DryadLogger.LogInformation("Process Exit", "Process {0} was in state {1}", processId, proc.CurrentState.ToString());
proc.ChangeState(ProcessState.SchedulingFailed);
}
else if (proc.CurrentState <= ProcessState.Running)
{
// If we're at AssignedToNode or Running, then the process either did really complete
// or the Vertex Service failed to start it - so this is not a scheduling error and the
// exit code has meaning.
DryadLogger.LogInformation("Process Exit", "Process {0} was in state {1}", processId, proc.CurrentState.ToString());
proc.ExitCode = (uint)exitCode;
proc.ChangeState(ProcessState.Completed);
}
else
{
// we've already reached this state previously, and this call should be idempotent
DryadLogger.LogInformation("Process Exit", "Process {0} was already in state {1}", processId, proc.CurrentState.ToString());
DryadLogger.LogMethodExit();
return;
}
if (proc.HandleClosed)
{
// This happens if a close handle comes from the GM
// before we've received notification that the process exited.
// For example, when the GM does:
// - Cancel
// - CloseHandle
// in rapid succession.
DryadLogger.LogDebug("Process Exit", "Delayed close handle for process {0}", processId);
CloseVertexProcess(processId);
}
lock (proc.SyncRoot)
{
if (dispatcherFaulted)
{
DryadLogger.LogWarning("Process Exit", "Process exiting due to faulted dispatcher");
proc.Dispatcher = null;
}
else if (proc.Dispatcher != null)
{
// Release dispatcher
DryadLogger.LogInformation("Process Exit", "Releasing dispatcher");
proc.Dispatcher.Release();
// Look for new request for node
ThreadPool.QueueUserWorkItem(new WaitCallback(this.FindRequestForNodeThreadFunc), proc.Dispatcher);
}
}
}
else
{
DryadLogger.LogError(0, null, "Unknown process id {0}", processId);
}
}
catch (Exception e)
{
DryadLogger.LogError(0, e, "Failed to transition vertex process {0} to exited gracefully", processId);
}
DryadLogger.LogMethodExit();
}
/// <summary>
/// This is the callback method for the async ScheduleProcess operation.
/// </summary>
/// <param name="asyncResult">AsyncState member is the Dispatcher that initiated the operation</param>
private void ScheduleProcessCallback(IAsyncResult asyncResult)
{
try
{
Dispatcher d = asyncResult.AsyncState as Dispatcher;
if (d != null)
{
int currentProcessId = d.CurrentProcess;
SchedulingResult schedulingResult = d.EndScheduleProcess(asyncResult);
if (schedulingResult == SchedulingResult.Failure)
{
// This indicates there was a fatal error (Exception or FaultException)
// Change process state to scheduling failed
DryadLogger.LogWarning("Schedule Process", "Async operation did not complete successfully for process {0} on node {1}", currentProcessId, d.NodeName);
if (currentProcessId != Dispatcher.InvalidProcessId)
{
// Since we will still be in the Unscheduled state, the return code will be ignored by
// ProcessExit, but we'll pass a nonzero exit code just to be sure we don't
// confuse the GM in case of a race condition.
ProcessExit(currentProcessId, unchecked((int)Constants.DrError_ProcessingInterrupted));
}
d.Release();
}
else if (schedulingResult == SchedulingResult.CommunicationError)
{
// This indicates that there was an error communicating with the node.
// We need to fault the dispatcher so that subsequent attemps don't try to use it again.
// Faulting the dispatcher will take care of exiting the current process, so no need to
// do it here.
DryadLogger.LogWarning("Schedule Process", "Async operation failed due to communication error for process {0} on node {1}", currentProcessId, d.NodeName);
d.RaiseFaultedEvent(false);
}
else if (schedulingResult == SchedulingResult.Pending)
{
// Nothing to do for this case -it indicates there was a problem and we're retrying
}
else if (schedulingResult == SchedulingResult.Success)
{
// Process has been scheduled, transition to AssignedToNode state
if (currentProcessId != Dispatcher.InvalidProcessId)
{
DryadLogger.LogInformation("Schedule Process", "Process {0} successfully scheduled on node {1}", currentProcessId, d.NodeName);
processTable[currentProcessId].ChangeState(ProcessState.AssignedToNode);
}
}
}
else
{
DryadLogger.LogWarning("Schedule Process", "Dispatcher not passed correctly to callback");
}
}
catch (Exception e)
{
DryadLogger.LogWarning("Schedule Process", "Schedule process callback threw exception: {0}", e.ToString());
}
}
private bool ScheduleProcess(ScheduleProcessRequest request, Dispatcher dispatcher)
{
lock (processTable.SyncRoot)
{
lock (this.processTable[request.Id].SyncRoot)
{
processTable[request.Id].Dispatcher = dispatcher;
}
}
if (dispatcher.ScheduleProcess(replyUri, request, new AsyncCallback(this.ScheduleProcessCallback)))
{
DryadLogger.LogInformation("Schedule Process", "Began asynchronous scheduling of process {0} on node '{1}': '{2}'", request.Id, dispatcher.NodeName, request.CommandLine);
return true;
}
else
{
DryadLogger.LogWarning("Schedule Process", "Failed to begin asynchronous scheduling of process {0} on node '{1}'", request.Id, dispatcher.NodeName);
return false;
}
}
private void FindRequestForNodeThreadFunc(Object state)
{
Dispatcher d = state as Dispatcher;
ScheduleProcessRequest req = null;
// FindRequestForNode takes a lock on the request pool
try
{
if (d != null)
{
if (FindRequestForNode(d.NodeName, out req))
{
if (!ScheduleProcess(req, d))
{
DryadLogger.LogWarning("Schedule Request on Node", "Failed to schedule process {0} on node {1}", req.Id, d.NodeName);
processTable[req.Id].ChangeState(ProcessState.SchedulingFailed);
d.Release();
}
}
}
}
catch (NullReferenceException)
{
if (d == null)
{
// Dispatcher has been faulted and set to null. Ignore.
}
else
{
throw;
}
}
}
#if false
// This thread is not currently used
private void VertexMonitorThreadFunc(Object state)
{
do
{
using (System.IO.StreamWriter sw = new System.IO.StreamWriter("vertex_health.txt", true))
{
sw.AutoFlush = true;
List<Dispatcher> dlist = new List<Dispatcher>();
lock (dispatcherPool.SyncRoot)
{
foreach (Dispatcher d in dispatcherPool)
{
dlist.Add(d);
}
}
Process proc = Process.GetCurrentProcess();
sw.WriteLine("<CheckPoint>");
sw.WriteLine(" <Timestamp>{0}</Timestamp>", DateTime.Now);
sw.WriteLine(" <MainModule>{0}</MainModule>", proc.MainModule);
sw.WriteLine(" <StartTime>{0}</StartTime>", proc.StartTime);
sw.WriteLine(" <VirtualMemorySize64>{0}</VirtualMemorySize64>", proc.VirtualMemorySize64);
sw.WriteLine(" <WorkingSet64>{0}</WorkingSet64>", proc.WorkingSet64);
foreach (Dispatcher d in dlist)
{
sw.WriteLine(" <Dispatcher>");
sw.WriteLine(" <Name>{0}</Name>", d.NodeName);
sw.WriteLine(" <Idle>{0}</Idle>", d.Idle);
sw.WriteLine(" <Faulted>{0}</Faulted>", d.Faulted);
sw.WriteLine(" <ConnectionAttempts>{0}</ConnectionAttempts>", d.ConnectionAttempts);
if (!d.Faulted)
{
VertexStatus status = d.CheckStatus();
sw.WriteLine(" <Alive>{0}</Alive>", status.serviceIsAlive);
if (status.serviceIsAlive)
{
sw.WriteLine(" <ProcessCount>{0}</ProcessCount>", status.runningProcessCount);
sw.WriteLine(" <FreePhysMem>{0}</FreePhysMem>", status.freePhysicalMemory);
sw.WriteLine(" <FreeVirtMem>{0}</FreeVirtMem>", status.freeVirtualMemory);
foreach (KeyValuePair<string, ulong> kvp in status.freeDiskSpaces)
{
//sw.WriteLine(" Disk: {0}, Free space = {1}", kvp.Key, kvp.Value);
}
foreach (VertexProcessInfo vpi in status.vps)
{
sw.WriteLine(" <VertexProcess>");
sw.WriteLine(" <CommandLine>{0}</CommandLine>", vpi.commandLine);
sw.WriteLine(" <Id>{0}</Id>", vpi.DryadId);
sw.WriteLine(" <State>{0}</State>", vpi.State);
sw.WriteLine(" </VertexProcess>");
}
}
}
sw.WriteLine(" </Dispatcher>");
}
sw.WriteLine("</CheckPoint>");
}
// Let the GM fault tolerance handle this
if (dispatcherPool.Count == 0)
{
DryadLogger.LogCritical(0, null, "No reachable dispatchers");
}
Thread.Sleep(1000 * 60);
} while (true);
}
#endif
#endregion
#region Factory Methods
private static VertexScheduler vertexScheduler = null;
private static Object factoryLock = new Object();
public static VertexScheduler GetInstance()
{
if (vertexScheduler == null)
{
lock (factoryLock)
{
if (vertexScheduler == null)
{
ProcessTable processTable = new ProcessTable();
vertexScheduler = new VertexScheduler(processTable);
vertexScheduler.Initialize();
}
}
}
return vertexScheduler;
}
#endregion
}
}