/* Copyright (c) Microsoft Corporation All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, MERCHANTABLITY OR NON-INFRINGEMENT. See the Apache Version 2.0 License for specific language governing permissions and limitations under the License. */ #include #include #include using namespace System::IO; using namespace Microsoft::Research::Peloponnese::Azure; using namespace Microsoft::Research::Peloponnese::Hdfs; using namespace msclr; // // Prints a timestamp as MM/DD/YYYY HH:MM:SS.MS // static System::String^ RecordTimeInternal() { DYNAMIC_TIME_ZONE_INFORMATION zone; SYSTEMTIME local; int firstTimeZone = GetDynamicTimeZoneInformation(&zone); GetLocalTime(&local); int timeZone = GetDynamicTimeZoneInformation(&zone); if (timeZone != firstTimeZone) { // yes, this is checking for the race that daylight savings time started or ended between the two calls. Sorry GetLocalTime(&local); } int bias; if (timeZone == TIME_ZONE_ID_STANDARD) { bias = -(zone.Bias + zone.StandardBias); } else if (timeZone == TIME_ZONE_ID_DAYLIGHT) { bias = -(zone.Bias + zone.DaylightBias); } int hourBias = bias / 60; int minuteBias = abs(bias) % 60; return System::String::Format("logtimelocal={0,2:D2}/{1,2:D2}/{2,4:D4} {3,2:D2}:{4,2:D2}:{5,2:D2}.{6,3:D3}UTC{7}{8}:{9,2:D2}", local.wMonth, local.wDay, local.wYear, local.wHour, local.wMinute, local.wSecond, local.wMilliseconds, (hourBias >= 0) ? "+" : "", hourBias, minuteBias); } // // Prints a timestamp as MM/DD/YYYY HH:MM:SS.MS // static void WriteTimestamp(TextWriter^ writer, System::String^ key, DrDateTime timestamp) { union { FILETIME ft; DrDateTime ts; }; ts = timestamp; SYSTEMTIME utc; GetSystemTimeAsFileTime(&ft); FileTimeToSystemTime(&ft, &utc); writer->Write(",{0}={1,2:D2}/{2,2:D2}/{3,4:D4} {4,2:D2}:{5,2:D2}:{6,2:D2}.{7,3:D3}", key, utc.wMonth, utc.wDay, utc.wYear, utc.wHour, utc.wMinute, utc.wSecond, utc.wMilliseconds); } static void WriteKeyValue(TextWriter^ writer, System::String^ key, System::String^ value) { value = value->Replace('"', '\''); if (value->IndexOf(',') >= 0 || value->IndexOf('\n') >= 0) { writer->Write(",{0}=\"{1}\"", key, value); } else { writer->Write(",{0}={1}", key, value); } } ref class PeloponneseLogger : public Microsoft::Research::Peloponnese::ILogger { public: virtual void Log(System::String^ message, System::String^ file, System::String^ function, int line) { if (DrLogging::Enabled(DrLog_Info)) { DrString sMessage(message); DrString sFile(file); DrString sFunction(function); DrLogHelper(DrLog_Info, sFile.GetChars(), sFunction.GetChars(), line)("%s", sMessage.GetChars()); } } virtual void Stop() { } }; DrCalypsoReporter::DrCalypsoReporter(DrNativeString uriString) { System::Uri^ uri = DrNew System::Uri(uriString); if (uri->Scheme == Utils::BlobScheme) { m_logStream = DrNew AzureLogAppendStream(uri, 0x20, false, false, gcnew PeloponneseLogger()); m_flushInterval = 1000; } else if (uri->Scheme == "hdfs") { m_logStream = DrNew HdfsLogAppendStream(uri, false); // flushing hdfs logs is very expensive m_flushInterval = 2 * 60 * 1000; } else if (uri->Scheme == "file") { m_logStream = DrNew FileStream(uri->UnescapeDataString(uri->AbsolutePath), FileMode::Create, FileAccess::Write); m_flushInterval = 1000; } // use a big write buffer so we don't write tons of tiny pages m_writer = DrNew StreamWriter(m_logStream, System::Text::Encoding::UTF8, 4 * 1024 * 1024); m_stop = DrNew System::Threading::Tasks::TaskCompletionSource(); m_flusher = DrNew System::Threading::Tasks::Task(DrNew System::Action(this, &DrCalypsoReporter::Flusher)); m_flusher->Start(); } DrCalypsoReporter::DrCalypsoReporter() { m_logStream = DrNull; m_flushInterval = 1000; m_writer = System::Console::Out; m_stop = DrNew System::Threading::Tasks::TaskCompletionSource(); m_flusher = DrNew System::Threading::Tasks::Task(DrNew System::Action(this, &DrCalypsoReporter::Flusher)); m_flusher->Start(); } void DrCalypsoReporter::Flusher() { try { while (true) { bool stopped = m_stop->Task->Wait(m_flushInterval); if (stopped) { return; } { lock l(this); if (m_writer == DrNull) { // discarded return; } m_writer->Flush(); if (m_logStream != DrNull) { m_logStream->Flush(); } } } } catch (System::Exception^ e) { DrString reason(e->ToString()); DrLogE("Calypso flusher got exception: %s", reason.GetChars()); } } System::String^ DrCalypsoReporter::RecordTime() { return RecordTimeInternal(); } void DrCalypsoReporter::BeginEntry(System::String^ entryTime, TextWriter^ writer) { writer->Write(entryTime); } void DrCalypsoReporter::Discard() { m_stop->TrySetResult(true); m_flusher->Wait(); { lock l(this); if (m_logStream != DrNull) { try { delete m_writer; delete m_logStream; } catch (System::Exception^ e) { DrString reason(e->ToString()); DrLogE("Calypso discard got exception: %s", reason.GetChars()); } m_logStream = DrNull; } m_writer = DrNull; } } void DrCalypsoReporter::ReportStart(DrDateTime startTime) { try { lock l(this); if (m_writer == DrNull) { // discarded return; } System::String^ entryTime = RecordTime(); TextWriter^ writer = gcnew StringWriter(); BeginEntry(entryTime, writer); WriteKeyValue(writer, "job", "start"); WriteTimestamp(writer, "utctime", startTime); writer->WriteLine(""); m_writer->Write(writer->ToString()); } catch (System::Exception^ e) { DrString reason(e->ToString()); DrLogE("Calypso got exception: %s", reason.GetChars()); Discard(); } } void DrCalypsoReporter::ReportStop(UINT exitCode, DrNativeString errorString, DrDateTime stopTime) { try { lock l(this); if (m_writer == DrNull) { // discarded return; } System::String^ entryTime = RecordTime(); TextWriter^ writer = gcnew StringWriter(); BeginEntry(entryTime, writer); WriteKeyValue(writer, "job", "stop"); WriteKeyValue(writer, "exitcode", exitCode.ToString("X8")); if (errorString != DrNull) { WriteKeyValue(writer, "errorstring", errorString); } WriteTimestamp(writer, "utctime", stopTime); writer->WriteLine(""); m_writer->Write(writer->ToString()); } catch (System::Exception^ e) { DrString reason(e->ToString()); DrLogE("Calypso got exception: %s", reason.GetChars()); } Discard(); } static void WriteError(TextWriter^ writer, DrErrorPtr status) { WriteKeyValue(writer, "statuscode", status->m_code.ToString("X8")); WriteKeyValue(writer, "statuscomponent", status->m_component.GetString()); WriteKeyValue(writer, "statusdescription", status->m_explanation.GetString()); } void DrCalypsoReporter::ReceiveMessage(DrProcessInfoRef info) { DrDateTime processCreated = DrDateTime_Never; DrDateTime processScheduled = DrDateTime_Never; DrString processName; { DrLockBoxKey process(info->m_process); processName = process->GetName(); processCreated = process->GetInfo()->m_jmProcessCreatedTime; processScheduled = process->GetInfo()->m_jmProcessScheduledTime; } try { lock l(this); if (m_writer == DrNull) { // discarded return; } System::String^ entryTime = RecordTime(); TextWriter^ writer = gcnew StringWriter(); switch (info->m_state->m_state) { case DPS_Starting: BeginEntry(entryTime, writer); WriteKeyValue(writer, "process", info->m_state->m_process->GetHandleIdAsString().GetString()); WriteKeyValue(writer, "transition", "starting"); WriteKeyValue(writer, "machine", info->m_state->m_process->GetAssignedNode()->GetLocality().GetString()); WriteKeyValue(writer, "id", info->m_state->m_process->GetDirectory().GetString()); WriteKeyValue(writer, "name", processName.GetString()); WriteTimestamp(writer, "utctime", info->m_state->m_creatingTime); writer->WriteLine(""); break; case DPS_Created: BeginEntry(entryTime, writer); WriteKeyValue(writer, "process", info->m_state->m_process->GetHandleIdAsString().GetString()); WriteKeyValue(writer, "transition", "created"); WriteTimestamp(writer, "utctime", info->m_state->m_createdTime); writer->WriteLine(""); break; case DPS_Running: BeginEntry(entryTime, writer); WriteKeyValue(writer, "process", info->m_state->m_process->GetHandleIdAsString().GetString()); WriteKeyValue(writer, "transition", "running"); WriteTimestamp(writer, "utctime", info->m_state->m_beginExecutionTime); writer->WriteLine(""); break; case DPS_Completed: BeginEntry(entryTime, writer); WriteKeyValue(writer, "process", info->m_state->m_process->GetHandleIdAsString().GetString()); WriteKeyValue(writer, "transition", "completed"); WriteTimestamp(writer, "utctime", info->m_state->m_terminatedTime); writer->WriteLine(""); break; case DPS_Failed: BeginEntry(entryTime, writer); WriteKeyValue(writer, "process", info->m_state->m_process->GetHandleIdAsString().GetString()); WriteKeyValue(writer, "transition", "failed"); WriteKeyValue(writer, "exitcode", info->m_state->m_exitCode.ToString()); WriteError(writer, info->m_state->m_status); WriteTimestamp(writer, "utctime", info->m_state->m_terminatedTime); writer->WriteLine(""); break; default: break; } m_writer->Write(writer->ToString()); } catch (System::Exception^ e) { DrString reason(e->ToString()); DrLogE("Calypso got exception: %s", reason.GetChars()); Discard(); } } void DrCalypsoReporter::ReceiveMessage(DrVertexInfoRef info) { System::String^ vertexId = info->m_info->GetProcessStatus()->GetVertexId().ToString(); System::String^ vertexVersion = info->m_info->GetProcessStatus()->GetVertexInstanceVersion().ToString(); System::String^ processGuid = "(no guid)"; System::String^ machineName = "(no computer)"; System::String^ idOnComputer = "(no id)"; if (info->m_process.IsEmpty() == false) { DrLockBoxKey process(info->m_process); DrProcessHandlePtr handle = process->GetInfo()->m_state->m_process; if (handle != DrNull) { idOnComputer = handle->GetAssignedNode()->GetName().GetString() + ":" + handle->GetDirectory().GetString(); processGuid = handle->GetHandleIdAsString().GetString(); if (handle->GetAssignedNode() != DrNull) { machineName = handle->GetAssignedNode()->GetLocality().GetString(); } } } try { lock l(this); if (m_writer == DrNull) { // discarded return; } System::String^ entryTime = RecordTime(); TextWriter^ writer = gcnew StringWriter(); DrVertexProcessStatusPtr status = DrNull; if (info->m_info != DrNull) { status = info->m_info->GetProcessStatus(); } switch (info->m_state) { case DVS_NotStarted: BeginEntry(entryTime, writer); WriteKeyValue(writer, "vertex", vertexId); WriteKeyValue(writer, "version", vertexVersion); WriteKeyValue(writer, "name", info->m_name.GetString()); if (info->m_partInStage >= 0) { WriteKeyValue(writer, "stagename", info->m_stageName.GetString()); WriteKeyValue(writer, "partInStage", info->m_partInStage.ToString()); } WriteKeyValue(writer, "transition", "created"); WriteTimestamp(writer, "utctime", info->m_statistics->m_creationTime); writer->WriteLine(""); break; case DVS_Starting: BeginEntry(entryTime, writer); WriteKeyValue(writer, "vertex", vertexId); WriteKeyValue(writer, "version", vertexVersion); WriteKeyValue(writer, "transition", "starting"); WriteTimestamp(writer, "utctime", info->m_statistics->m_startTime); writer->WriteLine(""); break; case DVS_Running: BeginEntry(entryTime, writer); WriteKeyValue(writer, "vertex", vertexId); WriteKeyValue(writer, "version", vertexVersion); WriteKeyValue(writer, "transition", "running"); WriteKeyValue(writer, "process", processGuid); WriteKeyValue(writer, "id", idOnComputer); WriteKeyValue(writer, "computer", machineName); WriteTimestamp(writer, "utctime", info->m_statistics->m_runningTime); writer->WriteLine(""); break; case DVS_Completed: BeginEntry(entryTime, writer); WriteKeyValue(writer, "vertex", vertexId); WriteKeyValue(writer, "version", vertexVersion); WriteKeyValue(writer, "transition", "completed"); WriteTimestamp(writer, "utctime", info->m_statistics->m_completionTime); writer->WriteLine(""); break; case DVS_Failed: BeginEntry(entryTime, writer); WriteKeyValue(writer, "vertex", vertexId); WriteKeyValue(writer, "version", vertexVersion); WriteKeyValue(writer, "transition", "failed"); if (info->m_statistics->m_exitStatus == DrError_CohortShutdown) { WriteKeyValue(writer, "reason", "dependent failure"); } else if (info->m_statistics->m_exitStatus == DrError_Unexpected) { WriteKeyValue(writer, "reason", "process terminated"); } else { WriteKeyValue(writer, "reason", info->m_statistics->m_exitStatus.ToString("X8")); } if (status != DrNull) { if (status->GetVertexErrorCode() != S_OK) { WriteKeyValue(writer, "errorcode", status->GetVertexErrorCode().ToString("X8")); } if (status->GetVertexErrorString().GetString() != DrNull) { WriteKeyValue(writer, "errorstring", status->GetVertexErrorString().GetString()); } } WriteTimestamp(writer, "utctime", info->m_statistics->m_completionTime); writer->WriteLine(""); break; default: break; }; if (info->m_state == DVS_Running && status != DrNull) { BeginEntry(entryTime, writer); WriteKeyValue(writer, "vertex", vertexId); WriteKeyValue(writer, "version", vertexVersion); WriteKeyValue(writer, "io", "starting"); DrInputChannelArrayRef inputs = status->GetInputChannels(); WriteKeyValue(writer, "numberOfInputs", inputs->Allocated().ToString()); for (int iChan=0; iChanAllocated(); ++iChan) { DrChannelDescriptionPtr c = inputs[iChan]; HRESULT err = c->GetChannelState(); WriteKeyValue(writer, "uriIn." + iChan.ToString(), c->GetChannelURI().GetString()); } DrOutputChannelArrayRef outputs = status->GetOutputChannels(); WriteKeyValue(writer, "numberOfOutputs", outputs->Allocated().ToString()); for (int oChan=0; oChanAllocated(); ++oChan) { DrChannelDescriptionPtr c = outputs[oChan]; WriteKeyValue(writer, "uriOut." + oChan.ToString(), c->GetChannelURI().GetString()); } writer->WriteLine(""); } if (info->m_state == DVS_RunningStatus || info->m_state == DVS_Completed || info->m_state == DVS_Failed) { if (info->m_statistics->m_totalInputData != DrNull && info->m_statistics->m_totalOutputData != DrNull) { BeginEntry(entryTime, writer); WriteKeyValue(writer, "vertex", vertexId); WriteKeyValue(writer, "version", vertexVersion); if (info->m_state == DVS_RunningStatus) { WriteKeyValue(writer, "io", "running"); } else { WriteKeyValue(writer, "io", "total"); } WriteKeyValue(writer, "totalRead", info->m_statistics->m_totalInputData->m_dataRead.ToString()); WriteKeyValue(writer, "tempRead", info->m_statistics->m_totalInputData->m_tempDataRead.ToString()); WriteKeyValue(writer, "tempReadInRack", info->m_statistics->m_totalInputData->m_tempDataReadCrossMachine.ToString()); WriteKeyValue(writer, "tempReadCrossRack", info->m_statistics->m_totalInputData->m_tempDataReadCrossPod.ToString()); WriteKeyValue(writer, "localRead", info->m_statistics->m_totalLocalInputData.ToString()); WriteKeyValue(writer, "totalWritten", info->m_statistics->m_totalOutputData->m_dataWritten.ToString()); if (info->m_statistics->m_inputData != DrNull) { DrInputChannelArrayRef inputs = DrNull; if (status != DrNull) { inputs = status->GetInputChannels(); } if (info->m_state == DVS_RunningStatus) { for (int iChan=0; iChanm_statistics->m_inputData->Allocated(); ++iChan) { DrInputChannelExecutionStatisticsPtr s = info->m_statistics->m_inputData[iChan]; WriteKeyValue(writer, "rb." + iChan.ToString(), s->m_dataRead.ToString()); if (inputs != DrNull) { DrChannelDescriptionPtr c = inputs[iChan]; WriteKeyValue(writer, "tb." + iChan.ToString(), c->GetChannelTotalLength().ToString()); } } } else { for (int iChan=0; iChanm_statistics->m_inputData->Allocated(); ++iChan) { DrInputChannelExecutionStatisticsPtr s = info->m_statistics->m_inputData[iChan]; WriteKeyValue(writer, "rb." + iChan.ToString(), s->m_dataRead.ToString()); if (inputs != DrNull) { DrChannelDescriptionPtr c = inputs[iChan]; WriteKeyValue(writer, "tb." + iChan.ToString(), c->GetChannelTotalLength().ToString()); } if (s->m_remoteMachine != DrNull) { WriteKeyValue(writer, "rC." + iChan.ToString(), s->m_remoteMachine->GetLocality().GetString()); WriteKeyValue(writer, "rR." + iChan.ToString(), s->m_tempDataReadCrossMachine.ToString()); WriteKeyValue(writer, "rCR." + iChan.ToString(), s->m_tempDataReadCrossPod.ToString()); } } } } if (info->m_statistics->m_outputData != DrNull) { for (int oChan=0; oChanm_statistics->m_outputData->Allocated(); ++oChan) { DrOutputChannelExecutionStatisticsPtr s = info->m_statistics->m_outputData[oChan]; WriteKeyValue(writer, "wb." + oChan.ToString(), s->m_dataWritten.ToString()); } } writer->WriteLine(""); } if ((info->m_state == DVS_Completed || info->m_state == DVS_Failed) && status != DrNull) { DrInputChannelArrayRef inputs = status->GetInputChannels(); for (int iChan=0; iChanAllocated(); ++iChan) { DrChannelDescriptionPtr c = inputs[iChan]; HRESULT err = c->GetChannelState(); if (err != S_OK && err != DrError_EndOfStream) { HRESULT channelErr = c->GetChannelErrorCode(); DrString channelErrString = c->GetChannelErrorString(); BeginEntry(entryTime, writer); WriteKeyValue(writer, "vertex", vertexId); WriteKeyValue(writer, "version", vertexVersion); WriteKeyValue(writer, "inputChannel", iChan.ToString()); WriteKeyValue(writer, "uri", c->GetChannelURI().GetString()); WriteKeyValue(writer, "errorstatus", err.ToString("X8")); if (channelErr != S_OK) { WriteKeyValue(writer, "errorcode", channelErr.ToString("X8")); } if (channelErrString.GetString() != DrNull) { WriteKeyValue(writer, "errorstring", channelErrString.GetString()); } writer->WriteLine(""); } } DrOutputChannelArrayRef outputs = status->GetOutputChannels(); for (int oChan=0; oChanAllocated(); ++oChan) { DrChannelDescriptionPtr c = outputs[oChan]; HRESULT err = c->GetChannelState(); if (err != S_OK && err != DrError_EndOfStream) { HRESULT channelErr = c->GetChannelErrorCode(); DrString channelErrString = c->GetChannelErrorString(); BeginEntry(entryTime, writer); WriteKeyValue(writer, "vertex", vertexId); WriteKeyValue(writer, "version", vertexVersion); WriteKeyValue(writer, "outputChannel", oChan.ToString()); WriteKeyValue(writer, "uri", c->GetChannelURI().GetString()); WriteKeyValue(writer, "errorstatus", err.ToString("X8")); if (channelErr != S_OK) { WriteKeyValue(writer, "errorcode", channelErr.ToString("X8")); } if (channelErrString.GetString() != DrNull) { WriteKeyValue(writer, "errorstring", channelErrString.GetString()); } writer->WriteLine(""); } } } } m_writer->Write(writer->ToString()); } catch (System::Exception^ e) { DrString reason(e->ToString()); DrLogE("Calypso got exception: %s", reason.GetChars()); Discard(); } } void DrCalypsoReporter::ReportFinalTopology(DrVertexPtr vertex, DrResourcePtr runningMachine, DrTimeInterval runningTime) { // doing nothing here }