/* Copyright (c) Microsoft Corporation All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, MERCHANTABLITY OR NON-INFRINGEMENT. See the Apache Version 2.0 License for specific language governing permissions and limitations under the License. */ //------------------------------------------------------------------------------ // // Handles WCF communication for vertex service // //------------------------------------------------------------------------------ namespace Microsoft.Research.Dryad { using System; using System.Collections.Generic; using System.Diagnostics; using System.Net.Security; using System.ServiceModel; using Microsoft.Research.Dryad; class ReplyDispatcher { private static string graphMgrUri = String.Empty; private static VertexCallbackServiceClient graphMgrClient; private static string vertexProcUri = String.Empty; private static VertexCallbackServiceClient vertexProcClient; private static NetTcpBinding binding = null; private static readonly int numRetries = 6; private static readonly int retrySleepTime = 1000; private static object vertexLock = new object(); private static object graphMgrLock = new object(); private static object syncRoot = new object(); private static bool shuttingDown = false; /// /// Flag to notify reply dispatcher that job is shutting down and any communication errors should be ignored /// internal static bool ShuttingDown { get { return shuttingDown; } set { shuttingDown = value; } } /// /// Initialize binding defaults /// static ReplyDispatcher() { using (ISchedulerHelper helper = SchedulerHelperFactory.GetInstance()) { binding = helper.GetVertexServiceBinding(); } } /// /// Check URI to see if it is the graph manager by checking for vertex id /1/ /// /// URI where vertex is expecting a response /// true if graph manager public static bool IsGraphMrgUri(string uri) { Uri u = new Uri(uri); return u.AbsolutePath.StartsWith("/1/"); } /// /// Creates a new WCF client to service listening at URI /// /// wcf service endpoint /// client to WCF service private static VertexCallbackServiceClient CreateClient(string uri) { VertexCallbackServiceClient client = new VertexCallbackServiceClient(binding, new EndpointAddress(uri)); lock (syncRoot) { // // If the graph manager URI is specified, store this client as the GM client, otherwise assume it's a vertex host // if (IsGraphMrgUri(uri)) { graphMgrUri = uri; graphMgrClient = client; } else { vertexProcUri = uri; vertexProcClient = client; } } return client; } /// /// Close an existing client /// /// client to dispose private static void DisposeClient(ref VertexCallbackServiceClient client) { if (client != null) { try { client.Close(); } catch (Exception) { try { client.Abort(); } catch (Exception) { // If client cannot be aborted, just finish silently. } } client = null; } else { throw new ArgumentNullException("client"); } } /// /// Returns client pointing to URI - create if needed /// /// WCF server address /// Client to server listening at URI private static VertexCallbackServiceClient GetClient(string uri) { if (graphMgrUri.Equals(uri, StringComparison.OrdinalIgnoreCase)) { return graphMgrClient; } else if (vertexProcUri.Equals(uri, StringComparison.OrdinalIgnoreCase)) { return vertexProcClient; } else { return CreateClient(uri); } } /// /// Try to reopen client to WCF service /// /// Address of service /// new client private static VertexCallbackServiceClient ReopenClient(string uri) { lock (syncRoot) { // // Get any existing client to this URI // VertexCallbackServiceClient client = GetClient(uri); if (client != null) { // // If a client exists, dispose it // DisposeClient(ref client); } // // Recreate the client // return CreateClient(uri); } } /// /// Helper method to retry opening the client for use with state changes and property comm /// /// URI to respond to /// Reason for retry /// new client - may be null on failures private static VertexCallbackServiceClient ReopenClientForRetry(string replyUri, Exception e) { VertexCallbackServiceClient client = null; DryadLogger.LogError(0, e); try { client = ReopenClient(replyUri); } catch (Exception reopenEx) { DryadLogger.LogError(0, reopenEx, "Unable to reopen client connection"); } // // If retrying, sleep briefly // System.Threading.Thread.Sleep(retrySleepTime); return client; } /// /// Notify URI of state change /// /// where to send state change notification /// vertex process id /// updated state /// success/failure of state change notification public static bool FireStateChange(string replyUri, int processId, ProcessState newState) { DryadLogger.LogMethodEntry(replyUri, processId, newState); bool result = false; VertexCallbackServiceClient client = GetClient(replyUri); // // Try to notify GM of state change up to numRetries times // for (int index = 0; index < numRetries; index++) { try { // // If client is null, try reopening it // if (client == null) { client = CreateClient(replyUri); } // // Make FireStateChange WCF call, return success // client.FireStateChange(processId, newState); result = true; break; } catch (Exception e) { if (shuttingDown) { // if shutting down, just return DisposeClient(ref client); return true; } else { // // If call failed, try reopening WCF client and calling again // client = ReopenClientForRetry(replyUri, e); } } } // // If failure occurs after X retry attempts, report error // DryadLogger.LogMethodExit(result); return result; } /// /// Notify GM that vertex host process exited /// /// GM address /// vertex process id /// reason for vertex host exit /// success/failure public static bool ProcessExited(string replyUri, int processId, int exitCode) { DryadLogger.LogMethodEntry(replyUri, processId, exitCode); bool result = false; VertexCallbackServiceClient client = GetClient(replyUri); // // Try to notify GM that the process has exited up to numRetries times // for(int index = 0; index < numRetries; index++) { try { // // If client is null, try reopening it // if(client == null) { client = CreateClient(replyUri); } // // Make ProcessExited WCF call, return success // client.ProcessExited(processId, exitCode); result = true; break; } catch (Exception e) { if (shuttingDown) { // if shutting down, just return DisposeClient(ref client); return true; } else { // // If call failed, try reopening WCF client and calling again // client = ReopenClientForRetry(replyUri, e); } } } // // If failure occurs after X retry attempts, report error // DryadLogger.LogMethodExit(result); return result; } /// /// Attempt to call SetGetPropsComplete on specified WCF service. /// /// Service endpoint /// /// /// /// /// /// public static bool SetGetPropsComplete(string replyUri, Process systemProcess, int processId, ProcessInfo info, string[] propertyLabels, ulong[] propertyVersions) { DryadLogger.LogMethodEntry(replyUri, processId); bool result = false; VertexCallbackServiceClient client = GetClient(replyUri); // // Try to set/get properties up to numRetries times // for (int index = 0; index < numRetries; index++) { try { // // If client is null, try reopening it // if (client == null) { client = CreateClient(replyUri); } // // Make SetGetPropsComplete WCF call, return success // client.SetGetPropsComplete(processId, info, propertyLabels, propertyVersions); result = true; break; } catch (Exception e) { if ((IsGraphMrgUri(replyUri) == false && systemProcess.HasExited) || shuttingDown) { // // If trying to connect to non-running vertex or job is shutting down, don't retry and report success. // DisposeClient(ref client); return true; } else { // // If call failed and talking to GM or running vertex process, try reopening WCF client and calling again // client = ReopenClientForRetry(replyUri, e); } } } // // If failed to connect X times, report error // DryadLogger.LogMethodExit(result); return result; } } }