/* Copyright (c) Microsoft Corporation All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, MERCHANTABLITY OR NON-INFRINGEMENT. See the Apache Version 2.0 License for specific language governing permissions and limitations under the License. */ namespace Microsoft.Research.Dryad { using System; using System.Collections.Generic; using System.Collections.Specialized; using System.Globalization; using System.Threading; using System.Diagnostics; using Microsoft.Research.Dryad; public class VertexScheduler { private ProcessTable processTable = null; private DispatcherPool dispatcherPool = new DispatcherPool(); private DispatcherPool badDispatcherPool = new DispatcherPool(); private RequestPool requestPool = new RequestPool(); private ISchedulerHelper schedulerHelper = SchedulerHelperFactory.GetInstance(); private VertexCallbackServiceHost callbackServiceHost; private JobStatus jobStatus = null; private string baseUri; private string replyUri; private int JobId = 0; private const int currentProcess = 1; private int processId = 0; private object dispatcherChangeLock = new object(); #region Public Members public void CancelScheduleProcess(int processId) { DryadLogger.LogMethodEntry(processId); XComputeProcess proc = null; if (processTable.TryGetValue(processId, out proc) == false) { // We don't know about this process DryadLogger.LogWarning("Cancel process", "Attempt to cancel unknown process, id {0}", processId); return; } // Try to remove it from request pool (unassigned) if (requestPool.Cancel(processId)) { DryadLogger.LogInformation("Cancel process", "Process request removed from request pool for process id {0}", processId); return; } // Handle already assigned processes proc.Cancel(); } public void CloseVertexProcess(int processId) { XComputeProcess proc = null; if (processId == currentProcess) { // We don't maintain an entry in the process table for the current process return; } if (processTable.TryGetValue(processId, out proc)) { // else if it's already assigned, release it at the node lock (proc.SyncRoot) { if (proc.Dispatcher != null) { if (proc.CurrentState != ProcessState.Completed) { // This can happen when the GM cancels a process and closes the handle right afterward. // We may not have received the state change from the cancellation yet. // Note that the handle was closed by the GM, but do nothing else to avoid leaking a Dispatcher. // ProcessExit will use this to know whether it also needs to close the handle. DryadLogger.LogDebug("Close vertex process", "Closing handle for process id {0} in state {1} - delaying close until process exit", processId, proc.CurrentState); proc.HandleClosed = true; } else { try { proc.Dispatcher.ReleaseProcess(processId); } finally { // Graph Manager is done with the process at this point is called so remove it from the table processTable.Remove(processId); } } } else { DryadLogger.LogInformation("Close vertex process", "Dispatcher is null for process id {0} - it was either unscheduled or the dispatcher faulted", processId); } } } else { DryadLogger.LogError(0, null, "Unknown process id {0}", processId); } } public void CreateVertexProcess(int processId) { XComputeProcess proc = new XComputeProcess(processId); this.processTable.Add(processId, proc); proc.ChangeState(ProcessState.Unscheduled); } public string CurrentProcessLocalPath { get { return ProcessPathHelper.ProcessPath(this.processId); } } public string CurrentProcessRemotePath { get { return GetProcessPath(this.processId, null); } } public string[] EnumerateProcessNodes() { return dispatcherPool.Nodes.ToArray(); } public string GetAssignedNode(int processId) { // TODO: Need to fix for local executor if it's supported again if (processId == currentProcess) { return AzureUtils.CurrentHostName; } else if (this.processTable.ContainsKey(processId)) { return this.processTable[processId].AssignedNode; } else { return null; } } public uint GetExitCode(int processId) { return this.processTable[processId].ExitCode; } public string GetProcessPath(int processId, string relativePath) { // TODO: Need to fix for local executor if it's supported again string node = GetAssignedNode(processId); if (String.IsNullOrEmpty(node)) { return null; } else { string path = String.Format(@"\\{0}\{1}\{2}\{3}\{4}", node, Constants.DscTempShare, Environment.UserName, this.JobId, processId); if (relativePath != null && relativePath.Length > 0) { path += @"\" + relativePath; } return path; } } public ProcessState GetProcessState(int processId) { if (processId == currentProcess) { return ProcessState.Running; } else if (this.processTable.ContainsKey(processId)) { return this.processTable[processId].CurrentState; } else { return ProcessState.Completed; } } public bool IsGraphManager { get { return (processId == 1); } } public bool IsVertex { get { return (processId > 1); } } public bool IsVertexRerun { get { return (processId == 0); } } public JobStatus JobStatus { get { return this.jobStatus; } } public void NotifyStateChange(int processId, long timeoutInterval, ProcessState targetState, StateChangeEventHandler handler) { this.processTable[processId].AddStateChangeListener(targetState, timeoutInterval, handler); } public void ProcessChangeState(int processId, ProcessState newState) { XComputeProcess proc = null; if (this.processTable.TryGetValue(processId, out proc)) { DryadLogger.LogDebug("Process Change State", "Process {0} changed to state {1}", processId, newState); if (newState == ProcessState.Running) { // Need to ensure that the process transitions to AssignedToNode before // transitioning to Running, or the GM gets mildly confused ThreadPool.QueueUserWorkItem(new WaitCallback(proc.TransitionToRunning)); } else { proc.ChangeState(newState); } } } public void ProcessExit(int processId, int exitCode) { ProcessExit(processId, exitCode, false); } public bool ProcessCancelled(int processId) { if (processTable.ContainsKey(processId)) { return processTable[processId].Cancelled; } return false; } public bool ScheduleProcess(int processId, string commandLine, List softAffinities, string hardAffinity, StringDictionary environment) { bool retVal = false; processTable[processId].SetIdAndVersion(commandLine); DryadLogger.LogInformation("Schedule process", "Internal ID {0} corresponds to vertex {1}.{2}", processId, processTable[processId].GraphManagerId, processTable[processId].GraphManagerVersion); DryadLogger.LogInformation("Schedule process", "Internal ID {0} has a command line of {1}", processId, commandLine); if (environment == null) { environment = new StringDictionary(); } environment[Constants.jobManager] = AzureUtils.CurrentHostName; environment["CCP_DRYADPROCID"] = processId.ToString(CultureInfo.InvariantCulture); ScheduleProcessRequest req = new ScheduleProcessRequest(processId, commandLine, softAffinities, hardAffinity, environment); Dispatcher dispatcher = null; // Take the request pool lock in case a ProcessExit comes in after we've looked for a node // but before the request has been added to the request pool. lock (requestPool.SyncRoot) { if (!FindNodeForRequest(req, out dispatcher)) { if (dispatcherPool.Count > 0) { DryadLogger.LogDebug("Schedule Process", "No nodes available, adding process {0} to request pool", processId); requestPool.Add(req); return true; } else { DryadLogger.LogCritical(0, null, "No available dispatchers"); return false; } } } // Found a Dispatcher, schedule the request outside of the lock retVal = ScheduleProcess(req, dispatcher); if (!retVal) { processTable[processId].ChangeState(ProcessState.SchedulingFailed); dispatcher.Release(); } return retVal; } public bool SetGetProps(int processId, ProcessPropertyInfo[] infos, string blockOnLabel, ulong blockOnVersion, long maxBlockTime, string getPropLabel, bool ProcessStatistics, GetSetPropertyEventHandler handler) { if (this.processTable.ContainsKey(processId)) { if (infos != null && infos.Length > 0) { // Only add for the first property info since we only want to fire completion once per request this.processTable[processId].AddPropertyListener(infos[0].propertyLabel, infos[0].propertyVersion, handler); } else if (getPropLabel != null && getPropLabel.Length > 0) { this.processTable[processId].AddPropertyListener(getPropLabel, 0, handler); } else { DryadLogger.LogError(0, null, "infos and getPropLabel both empty"); return false; } lock (this.processTable[processId].SyncRoot) { if (this.processTable[processId].Dispatcher != null) { if (this.processTable[processId].Dispatcher.SetGetProps(replyUri, processId, infos, blockOnLabel, blockOnVersion, maxBlockTime, getPropLabel, ProcessStatistics)) { return true; } } } // Keep returning error to GM and let its fault-tolerance kick in if (dispatcherPool.Count == 0) { DryadLogger.LogCritical(0, null, "All dispatchers are faulted."); } return false; } else { DryadLogger.LogError(0, null, "process id {0} not found in process table", processId); return false; } } public void SetGetPropsComplete(int processId, ProcessInfo info, string[] propertyLabels, ulong[] propertyVersions) { XComputeProcess proc = null; if (processTable.TryGetValue(processId, out proc)) { proc.SetGetPropsComplete(info, propertyLabels, propertyVersions); } else { DryadLogger.LogError(0, null, "process id {0} not found in process table", processId); } } /// /// When called from the GM, shuts down all the vertex services and closes the communication channels. /// When called from the vertex host, closes the communication channel to the local vertex service. /// /// Code to pass to the vertex services. Currently unused. public void Shutdown(uint ShutdownCode) { DryadLogger.LogMethodEntry(ShutdownCode); // If this is the GM, invoke Shutdown asynchronously to improve job shutdown time if (processId == 1) { // We no longer need to listen for task state changes schedulerHelper.StopTaskMonitorThread(); lock (dispatcherPool.SyncRoot) { foreach (Dispatcher disp in dispatcherPool) { DryadLogger.LogDebug("Shutdown", "Calling Shutdown on dispatcher for node {0}", disp.NodeName); Stopwatch sw = new Stopwatch(); sw.Start(); try { disp.Shutdown(0); sw.Stop(); } catch (Exception e) { sw.Stop(); DryadLogger.LogError(0, e, "Exception calling Shutdown on dispatcher for node {0}", disp.NodeName); } DryadLogger.LogDebug("Shutdown", "Dispatcher.Shutdown took {0} ms", sw.ElapsedMilliseconds); } } } // Dispose the SchedulerHelper instance to clean up resources schedulerHelper.Dispose(); schedulerHelper = null; // Clean out the dispatcher pool (this also disposes all dispatchers) dispatcherPool.Clear(); // Stop the callback service callbackServiceHost.Stop(); DryadLogger.LogMethodExit(); } public bool WaitForStateChange(int processId, long timeoutInterval, ProcessState targetState) { DryadLogger.LogDebug("Wait for state change", "Process id: {0}, targetState: {1}", processId, targetState); if (this.processTable.ContainsKey(processId)) { using (ManualResetEvent waitEvent = new ManualResetEvent(false)) { this.processTable[processId].AddStateChangeWaiter(targetState, waitEvent); return waitEvent.WaitOne(TimeSpan.FromMilliseconds(timeoutInterval / 10), false); } } else { DryadLogger.LogError(0, null, "process id {0} not found in process table", processId); return false; } } #endregion #region Private Members private VertexScheduler(ProcessTable table) { this.processTable = table; this.jobStatus = new JobStatus(schedulerHelper); // These environment variables will not be set when the vertex rerun command is executed // Set them to 0 so we can use them later to detect that we're rerunning a vertex outside of an HPC job if (!Int32.TryParse(Environment.GetEnvironmentVariable("CCP_JOBID"), out JobId)) { JobId = 0; } if (!Int32.TryParse(Environment.GetEnvironmentVariable("CCP_DRYADPROCID"), out processId)) { processId = 0; } this.baseUri = String.Format(Constants.vertexCallbackAddrFormat, AzureUtils.CurrentHostName, processId); this.replyUri = this.baseUri + Constants.vertexCallbackServiceName; this.callbackServiceHost = new VertexCallbackServiceHost(this); } /// /// Create a new dispatcher and add to the good dispatcher pool. /// /// HPC Task Id /// Name of node this dispatcher is for /// State of task when dispatcher is created (always Running now) /// Dispatcher that was added, or null if a dispatcher already exists in the good pool for specified node private Dispatcher AddDispatcher(int taskid, string node, VertexTaskState state) { VertexComputeNode cn = new VertexComputeNode(); cn.instanceId = taskid; cn.ComputeNode = node; cn.State = state; Dispatcher d = new Dispatcher(schedulerHelper, cn); d.FaultedEvent += new DispatcherFaultedEventHandler(OnDispatcherFaulted); if (!dispatcherPool.Add(d)) { // There's already a dispatcher for this node d.Dispose(); d = null; } return d; } private bool FindRequestForNode(string node, out ScheduleProcessRequest req) { req = null; ulong maxAffinity = 0; bool result = false; Dispatcher dispatcher = null; Stopwatch swTotal = new Stopwatch(); Stopwatch swSearch = new Stopwatch(); Stopwatch swBlock = new Stopwatch(); int requestCount = 0; swTotal.Start(); if (dispatcherPool.TryReserveDispatcher(node, out dispatcher)) { swBlock.Start(); lock (requestPool.SyncRoot) { swBlock.Stop(); swSearch.Start(); requestCount = requestPool.Count; if (requestCount != 0) { foreach (ScheduleProcessRequest r in requestPool) { // Skip any lingering processes which have been cancelled. if (processTable.ContainsKey(r.Id) && processTable[r.Id].Cancelled) { continue; } if (r.MustRunOnNode(node)) { req = r; DryadLogger.LogDebug("Find Request for Node", "process {0} has hard affinity constraint for node {1}", req.Id, node); break; } else if (r.CanRunOnNode(node)) { ulong thisAffinity = r.GetAffinityWeightForNode(node); if (thisAffinity == 0 && req == null) { req = r; DryadLogger.LogDebug("Find Request for Node", "Process {0} has 0 affinity constraint for node {1} but no other process has been selected yet", r.Id, node); } else if (thisAffinity > maxAffinity) { maxAffinity = thisAffinity; req = r; DryadLogger.LogDebug("Find Request for Node", "Process {0} with affinity constraint {1} for node {2} larger than previous max", r.Id, thisAffinity, node); } } } } swSearch.Stop(); if (req != null) { requestPool.Remove(req); DryadLogger.LogDebug("Find Request for Node", "Found request {0} for node {1}", req.Id, node); result = true; } else { DryadLogger.LogDebug("Find Request for Node", "Did not find any requests for node {0}", node); dispatcher.Release(); result = false; } } } swTotal.Stop(); DryadLogger.LogInformation("Find Request for Node", "Searching {0} requests. Block {1} ms. Inner search {2} ms. Total elapsed time {3} ms.", requestCount, swBlock.ElapsedMilliseconds, swSearch.ElapsedMilliseconds, swTotal.ElapsedMilliseconds); return result; } private bool FindNodeForRequest(ScheduleProcessRequest req, out Dispatcher dispatcher) { dispatcher = null; if (req.HardAffinity != null) { if (dispatcherPool.TryReserveDispatcher(req.HardAffinity, out dispatcher)) { return true; } else { return false; } } else { // First try soft affinity in decreasing order (assumes Soft Affinity list in req is sorted descending by weight) // Keep a map of the nodes we've already tried, because Dryad adds each affinity twice // once for the node and once for the "pod" Dictionary attemptedNodes = new Dictionary(); int count = 0; for (int i = 0; i < req.AffinityCount; i++) { if (attemptedNodes.ContainsKey(req.AffinityAt(i).Node.ToUpper())) { continue; } attemptedNodes.Add(req.AffinityAt(i).Node.ToUpper(), true); count++; if (dispatcherPool.TryReserveDispatcher(req.AffinityAt(i).Node, out dispatcher)) { DryadLogger.LogDebug("Find Node For Request", "process {0} satisfied affinity constraint: node {1}, weight {2}", req.Id, req.AffinityAt(i).Node, req.AffinityAt(i).Weight); return true; } DryadLogger.LogDebug("Find Node For Request", "process {0} did not satisfy affinity constraint: node {1}, weight {2}", req.Id, req.AffinityAt(i).Node, req.AffinityAt(i).Weight); } // If we get this far and AffinityCount > 0, then we failed to satisfy the affinity constraints // log a message so we can more easily detect this situation if (count > 0) { DryadLogger.LogInformation("Find Node For Request", "process {0} failed to satisfy any of {1} affinity constraints", req.Id, count); } // Finally try any available node lock (dispatcherPool.SyncRoot) { foreach (Dispatcher d in dispatcherPool) { if (req.CanRunOnNode(d.NodeName)) { if (d.Reserve()) { dispatcher = d; return true; } } } } } return false; } private void Initialize() { if (IsVertexRerun) { // Vertex rerun command is being executed, don't create any dispatchers return; } else if (IsGraphManager) { VertexChangeEventHandler evtHandler = new VertexChangeEventHandler(OnVertexChanged); schedulerHelper.OnVertexChange += evtHandler; schedulerHelper.StartTaskMonitorThread(); if (!schedulerHelper.WaitForTasksReady()) { // The graph manager will abort because we will not return any vertex nodes DryadLogger.LogCritical(0, null, "Unable to begin job: too many vertex tasks failed"); schedulerHelper.OnVertexChange -= evtHandler; dispatcherPool.Clear(); return; } // TODO: we need to be able to turn this off //ThreadPool.QueueUserWorkItem(new WaitCallback(VertexMonitorThreadFunc)); } // IsVertex else { // On vertex nodes, create a dispatcher for the local vertex service and add an entry to the process table for the local process Dispatcher d = AddDispatcher(Int32.Parse(Environment.GetEnvironmentVariable(Constants.taskIdEnvVar)), "localhost", VertexTaskState.Running); XComputeProcess proc = new XComputeProcess(processId); lock (proc.SyncRoot) { proc.Dispatcher = d; } this.processTable.Add(processId, proc); } callbackServiceHost.Start(this.baseUri, this.schedulerHelper); } /// /// When a dispatcher faults due to a communication error (as opposed to a task failure) /// it is moved to the bad dispatcher pool and a timer is set to retry the dispatcher /// after a predetermined interval. This method is called when that timer fires. /// /// The dispatcher to be retried private void RetryFaultedDispatcher(object state) { DryadLogger.LogMethodEntry(); Dispatcher newDispatcher = null; Dispatcher d = state as Dispatcher; if (d != null) { DryadLogger.LogDebug("Retry faulted dispatcher", "Creating new dispatcher for node {0}", d.NodeName); lock (dispatcherChangeLock) { // Add a new dispatcher for this node newDispatcher = AddDispatcher(d.TaskId, d.NodeName, VertexTaskState.Running); // Get rid of the old dispatcher badDispatcherPool.Remove(d); d.Dispose(); } if (newDispatcher != null) { // Look for a request to run on this node ThreadPool.QueueUserWorkItem(new WaitCallback(this.FindRequestForNodeThreadFunc), newDispatcher); } } else { DryadLogger.LogWarning("Retry faulted dispatcher", "state parameter not a valid dispatcher"); } DryadLogger.LogMethodExit(); } /// /// This is the event handler for the Dispatcher.FaultedEvent event. /// The FaultedEvent event is raised when a task transitions out of a running /// state, or when there is a communication error wich does not succeed after N retries. /// /// The dispatcher raising the faulted event /// Not used private void OnDispatcherFaulted(object sender, EventArgs e) { Dispatcher d = sender as Dispatcher; if (d != null) { DryadLogger.LogWarning("Dispatcher Faulted", "Dispatcher for node '{0}' faulted due to {1}", d.NodeName, d.SchedulerTaskFailed ? "failed YARN Container" : "communication error"); lock (dispatcherChangeLock) { // Remove from dispatcher pool dispatcherPool.Remove(d); if (d.SchedulerTaskFailed) { // If we're faulting because the scheduler task transitioned to // a non-running state, then we want to completely remove the dispatcher badDispatcherPool.Remove(d); d.Dispose(); } else { // If we're faulting because of a communication error, then we want to // add to bad dispatcher pool so that we'll retry it again badDispatcherPool.Add(d); // Set up a timer to move this dispatcher out of the bad pool in the future d.SetRetryTimer(new TimerCallback(this.RetryFaultedDispatcher)); } } if (d.CurrentProcess != Dispatcher.InvalidProcessId) { ProcessExit(d.CurrentProcess, unchecked((int)Constants.DrError_ProcessingInterrupted), true); } } } private void CheckForOutOfDispatchers() { if (badDispatcherPool.Count == 0 && dispatcherPool.Count == 0) { DryadLogger.LogError(0, null, "All vertex tasks have failed"); lock (requestPool.SyncRoot) { foreach (ScheduleProcessRequest r in requestPool) { XComputeProcess proc; if (processTable.TryGetValue(r.Id, out proc)) { DryadLogger.LogInformation("No Valid Dispatchers", "Transitioning process {0} to state {1} because all vertex tasks failed", r.Id, ProcessState.SchedulingFailed.ToString()); proc.ChangeState(ProcessState.SchedulingFailed); } else { DryadLogger.LogCritical(0, null, "Failed to find process {0} in process table, exiting application.", r.Id); throw new ApplicationException(String.Format("All vertex tasks failed and unable to cancel pending request id {0}", r.Id)); } } requestPool.Clear(); } } } /// /// This event handler is called from ISchedulerHelper task monitoring thread in response /// to an HPC Task state change. /// /// Not used /// Information about the task state transition private void OnVertexChanged(object sender, VertexChangeEventArgs e) { Dispatcher oldDispatcher = null; Dispatcher newDispatcher = null; bool addNewDispatcher = false; bool faultOldDispatcher = false; lock (dispatcherChangeLock) { bool dispatcherFound = dispatcherPool.GetByTaskId(e.Id, out oldDispatcher); if (!dispatcherFound) { // Check to see if this dispatcher was already faulted due to a communication error dispatcherFound = badDispatcherPool.GetByTaskId(e.Id, out oldDispatcher); } // Task state change if (e.OldState != e.NewState) { // Transitioning to, e.g., queued if (e.NewState < VertexTaskState.Running) { DryadLogger.LogInformation("Vertex Task State Change", "Task {0} transitioned to waiting", e.Id); // If there is a dispatcher for the task, then the task has previously been running. // Now it's not, so we need to fault the dispatcher. if (dispatcherFound) { DryadLogger.LogWarning("Vertex Task State Change", "Previously running task {0} transitioned to waiting", e.Id); faultOldDispatcher = true; } } // Transition to running else if (e.NewState == VertexTaskState.Running) { if (!dispatcherFound) { // No dispatcher for task, add a new one DryadLogger.LogInformation("Vertex Task State Change", "Task {0} transitioned to running", e.Id); addNewDispatcher = true; } else if (String.Compare(e.OldNode, e.NewNode, StringComparison.OrdinalIgnoreCase) != 0) { // Dispatcher found, but task is now on a new node // 1. Make sure old dispatcher is faulted. // 2. Add a new one for the new node DryadLogger.LogInformation("Vertex Task State Change", "Running task {0} assigned to new node", e.Id); faultOldDispatcher = true; addNewDispatcher = true; } else { // Dispatcher found, task is on same node DryadLogger.LogWarning("Vertex Task State Change", "Change notification for running task {0}, but state and node are unchanged in notification", e.Id); } } // Job is exiting, nothing to do else if (e.NewState == VertexTaskState.Finished) { DryadLogger.LogDebug("Vertex Task State Change", "Task {0} transitioned to finished", e.Id); } // Failed or Cancelled else { DryadLogger.LogWarning("Vertex Task State Change", "Task {0} transitioned to failed or cancelled", e.Id); // Fault dispatcher if it isn't already if (dispatcherFound) { faultOldDispatcher = true; } } } // Node change else if (String.Compare(e.OldNode, e.NewNode, StringComparison.OrdinalIgnoreCase) != 0) { if (e.NewState == VertexTaskState.Running) { DryadLogger.LogDebug("Vertex Task State Change", "Task {0} moved from node {1} to node {2}", e.Id, e.OldNode, e.NewNode); if (dispatcherFound) { faultOldDispatcher = true; addNewDispatcher = true; } } } // Running -> Queued -> Running, e.g. else if (e.OldRequeueCount < e.NewRequeueCount) { DryadLogger.LogDebug("Vertex Task State Change", "Task {0} node {1} state {2} unchanged from previous state: likely missed a state change notification.", e.Id, e.NewNode, e.NewState.ToString()); // Was task running previously? If so, fault the old dispatcher. if (dispatcherFound) { faultOldDispatcher = true; } // Is task running now? If so, create a new dispatcher to re-establish connection. if (e.NewState == VertexTaskState.Running) { addNewDispatcher = true; } } } if (faultOldDispatcher) { oldDispatcher.RaiseFaultedEvent(true); } if (addNewDispatcher) { newDispatcher = AddDispatcher(e.Id, e.NewNode, e.NewState); if (newDispatcher != null) { // Look for new request for node ThreadPool.QueueUserWorkItem(new WaitCallback(FindRequestForNodeThreadFunc), newDispatcher); } else { DryadLogger.LogError(0, null, "Failed to add new dispatcher for node {0}", e.NewNode); } } if (faultOldDispatcher) { // Check to see if we have any dispatchers left. If not, we need to fail // everything in the request pool. CheckForOutOfDispatchers(); } } private void ProcessExit(int processId, int exitCode, bool dispatcherFaulted) { DryadLogger.LogMethodEntry(processId, exitCode, dispatcherFaulted); try { XComputeProcess proc = null; if (processTable.TryGetValue(processId, out proc)) { DryadLogger.LogInformation("Process Exit", "found process {0} for vertex {1}.{2}", processId, proc.GraphManagerId, proc.GraphManagerVersion); // Update process if (proc.CurrentState < ProcessState.AssignedToNode && dispatcherFaulted) { // If we haven't yet reached AssignedToNode and the dispatcher faulted, then scheduling failed DryadLogger.LogInformation("Process Exit", "Process {0} was in state {1}", processId, proc.CurrentState.ToString()); proc.ChangeState(ProcessState.SchedulingFailed); } else if (proc.CurrentState <= ProcessState.Running) { // If we're at AssignedToNode or Running, then the process either did really complete // or the Vertex Service failed to start it - so this is not a scheduling error and the // exit code has meaning. DryadLogger.LogInformation("Process Exit", "Process {0} was in state {1}", processId, proc.CurrentState.ToString()); proc.ExitCode = (uint)exitCode; proc.ChangeState(ProcessState.Completed); } else { // we've already reached this state previously, and this call should be idempotent DryadLogger.LogInformation("Process Exit", "Process {0} was already in state {1}", processId, proc.CurrentState.ToString()); DryadLogger.LogMethodExit(); return; } if (proc.HandleClosed) { // This happens if a close handle comes from the GM // before we've received notification that the process exited. // For example, when the GM does: // - Cancel // - CloseHandle // in rapid succession. DryadLogger.LogDebug("Process Exit", "Delayed close handle for process {0}", processId); CloseVertexProcess(processId); } lock (proc.SyncRoot) { if (dispatcherFaulted) { DryadLogger.LogWarning("Process Exit", "Process exiting due to faulted dispatcher"); proc.Dispatcher = null; } else if (proc.Dispatcher != null) { // Release dispatcher DryadLogger.LogInformation("Process Exit", "Releasing dispatcher"); proc.Dispatcher.Release(); // Look for new request for node ThreadPool.QueueUserWorkItem(new WaitCallback(this.FindRequestForNodeThreadFunc), proc.Dispatcher); } } } else { DryadLogger.LogError(0, null, "Unknown process id {0}", processId); } } catch (Exception e) { DryadLogger.LogError(0, e, "Failed to transition vertex process {0} to exited gracefully", processId); } DryadLogger.LogMethodExit(); } /// /// This is the callback method for the async ScheduleProcess operation. /// /// AsyncState member is the Dispatcher that initiated the operation private void ScheduleProcessCallback(IAsyncResult asyncResult) { try { Dispatcher d = asyncResult.AsyncState as Dispatcher; if (d != null) { int currentProcessId = d.CurrentProcess; SchedulingResult schedulingResult = d.EndScheduleProcess(asyncResult); if (schedulingResult == SchedulingResult.Failure) { // This indicates there was a fatal error (Exception or FaultException) // Change process state to scheduling failed DryadLogger.LogWarning("Schedule Process", "Async operation did not complete successfully for process {0} on node {1}", currentProcessId, d.NodeName); if (currentProcessId != Dispatcher.InvalidProcessId) { // Since we will still be in the Unscheduled state, the return code will be ignored by // ProcessExit, but we'll pass a nonzero exit code just to be sure we don't // confuse the GM in case of a race condition. ProcessExit(currentProcessId, unchecked((int)Constants.DrError_ProcessingInterrupted)); } d.Release(); } else if (schedulingResult == SchedulingResult.CommunicationError) { // This indicates that there was an error communicating with the node. // We need to fault the dispatcher so that subsequent attemps don't try to use it again. // Faulting the dispatcher will take care of exiting the current process, so no need to // do it here. DryadLogger.LogWarning("Schedule Process", "Async operation failed due to communication error for process {0} on node {1}", currentProcessId, d.NodeName); d.RaiseFaultedEvent(false); } else if (schedulingResult == SchedulingResult.Pending) { // Nothing to do for this case -it indicates there was a problem and we're retrying } else if (schedulingResult == SchedulingResult.Success) { // Process has been scheduled, transition to AssignedToNode state if (currentProcessId != Dispatcher.InvalidProcessId) { DryadLogger.LogInformation("Schedule Process", "Process {0} successfully scheduled on node {1}", currentProcessId, d.NodeName); processTable[currentProcessId].ChangeState(ProcessState.AssignedToNode); } } } else { DryadLogger.LogWarning("Schedule Process", "Dispatcher not passed correctly to callback"); } } catch (Exception e) { DryadLogger.LogWarning("Schedule Process", "Schedule process callback threw exception: {0}", e.ToString()); } } private bool ScheduleProcess(ScheduleProcessRequest request, Dispatcher dispatcher) { lock (processTable.SyncRoot) { lock (this.processTable[request.Id].SyncRoot) { processTable[request.Id].Dispatcher = dispatcher; } } if (dispatcher.ScheduleProcess(replyUri, request, new AsyncCallback(this.ScheduleProcessCallback))) { DryadLogger.LogInformation("Schedule Process", "Began asynchronous scheduling of process {0} on node '{1}': '{2}'", request.Id, dispatcher.NodeName, request.CommandLine); return true; } else { DryadLogger.LogWarning("Schedule Process", "Failed to begin asynchronous scheduling of process {0} on node '{1}'", request.Id, dispatcher.NodeName); return false; } } private void FindRequestForNodeThreadFunc(Object state) { Dispatcher d = state as Dispatcher; ScheduleProcessRequest req = null; // FindRequestForNode takes a lock on the request pool try { if (d != null) { if (FindRequestForNode(d.NodeName, out req)) { if (!ScheduleProcess(req, d)) { DryadLogger.LogWarning("Schedule Request on Node", "Failed to schedule process {0} on node {1}", req.Id, d.NodeName); processTable[req.Id].ChangeState(ProcessState.SchedulingFailed); d.Release(); } } } } catch (NullReferenceException) { if (d == null) { // Dispatcher has been faulted and set to null. Ignore. } else { throw; } } } #if false // This thread is not currently used private void VertexMonitorThreadFunc(Object state) { do { using (System.IO.StreamWriter sw = new System.IO.StreamWriter("vertex_health.txt", true)) { sw.AutoFlush = true; List dlist = new List(); lock (dispatcherPool.SyncRoot) { foreach (Dispatcher d in dispatcherPool) { dlist.Add(d); } } Process proc = Process.GetCurrentProcess(); sw.WriteLine(""); sw.WriteLine(" {0}", DateTime.Now); sw.WriteLine(" {0}", proc.MainModule); sw.WriteLine(" {0}", proc.StartTime); sw.WriteLine(" {0}", proc.VirtualMemorySize64); sw.WriteLine(" {0}", proc.WorkingSet64); foreach (Dispatcher d in dlist) { sw.WriteLine(" "); sw.WriteLine(" {0}", d.NodeName); sw.WriteLine(" {0}", d.Idle); sw.WriteLine(" {0}", d.Faulted); sw.WriteLine(" {0}", d.ConnectionAttempts); if (!d.Faulted) { VertexStatus status = d.CheckStatus(); sw.WriteLine(" {0}", status.serviceIsAlive); if (status.serviceIsAlive) { sw.WriteLine(" {0}", status.runningProcessCount); sw.WriteLine(" {0}", status.freePhysicalMemory); sw.WriteLine(" {0}", status.freeVirtualMemory); foreach (KeyValuePair kvp in status.freeDiskSpaces) { //sw.WriteLine(" Disk: {0}, Free space = {1}", kvp.Key, kvp.Value); } foreach (VertexProcessInfo vpi in status.vps) { sw.WriteLine(" "); sw.WriteLine(" {0}", vpi.commandLine); sw.WriteLine(" {0}", vpi.DryadId); sw.WriteLine(" {0}", vpi.State); sw.WriteLine(" "); } } } sw.WriteLine(" "); } sw.WriteLine(""); } // Let the GM fault tolerance handle this if (dispatcherPool.Count == 0) { DryadLogger.LogCritical(0, null, "No reachable dispatchers"); } Thread.Sleep(1000 * 60); } while (true); } #endif #endregion #region Factory Methods private static VertexScheduler vertexScheduler = null; private static Object factoryLock = new Object(); public static VertexScheduler GetInstance() { if (vertexScheduler == null) { lock (factoryLock) { if (vertexScheduler == null) { ProcessTable processTable = new ProcessTable(); vertexScheduler = new VertexScheduler(processTable); vertexScheduler.Initialize(); } } } return vertexScheduler; } #endregion } }