/*
Copyright (c) Microsoft Corporation
All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in
compliance with the License. You may obtain a copy of the License
at http://www.apache.org/licenses/LICENSE-2.0
THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER
EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED WARRANTIES OR CONDITIONS OF
TITLE, FITNESS FOR A PARTICULAR PURPOSE, MERCHANTABLITY OR NON-INFRINGEMENT.
See the Apache Version 2.0 License for specific language governing permissions and
limitations under the License.
*/
#undef USE_DSC
#undef USE_TIDYFS
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using Microsoft.Research.Tools;
namespace Microsoft.Research.JobObjectModel
{
///
/// Information descoverable statically by inspecting a partitioned table.
///
public class StaticPartitionedTableInformation
{
///
/// File name.
///
public string Name { get; protected set; }
///
/// Provider that hosts this partitioned table.
///
public string UriType { get; protected set; }
///
/// URI used to reach the partitioned table; contains provider and options.
///
public string Options { get; protected set; }
///
/// Basic URI, except provider and options.
///
public string Uri { get; protected set; }
///
/// Number of partitions. Could be -1 if the number of partitions is unknown.
///
public long PartitionCount { get; internal set; }
///
/// Estimated size of the contents; could be -1 if the size is unknown.
///
public long EstimatedSize { get; internal set; }
///
/// In case of error this is the message.
///
public string Error { get; internal set; }
///
/// Short name of the table.
///
public string Header { get; protected set; }
///
/// Cluster where the job accessing the stream resides.
///
ClusterConfiguration Config { get; set; }
private List partitions;
///
/// Static information about each partition.
///
public struct StaticPartitionInformation
{
///
/// Number of the partition.
///
public int PartitionNumber { get; private set; }
///
/// Size of partition.
///
public long PartitionSize { get; private set; }
///
/// Copies of the replica: either a number or one machine name.
///
public string Copies { get; private set; }
///
/// Crate information about a partition.
///
/// Partition size.
/// Partition number.
/// Number of replicas.
public StaticPartitionInformation(int number, long size, int replicas)
: this()
{
this.PartitionNumber = number;
this.PartitionSize = size;
this.Copies = replicas.ToString();
}
///
/// Crate information about a partition.
///
/// Partition size.
/// Partition number.
/// Location of the unique replica.
public StaticPartitionInformation(int number, long size, string location)
: this()
{
this.PartitionNumber = number;
this.PartitionSize = size;
this.Copies = location;
}
}
///
/// Information about the partitions.
///
public IEnumerable Partitions { get { return this.partitions; } }
///
/// Add a new partition.
///
/// Partition to add.
public void AddPartition(StaticPartitionInformation spi)
{
this.partitions.Add(spi);
}
///
/// Save here information about the constructor invocation arguments; used by Refresh().
///
private class SaveConstructorArguments
{
public string[] code;
public DryadJobStaticPlan plan;
public DryadJobStaticPlan.Stage source;
}
///
/// Save here information about the constructor invocation arguments.
///
SaveConstructorArguments constructorArguments;
///
/// Code attached to the stage (if any).
///
public string[] Code { get { return this.constructorArguments.code; } }
///
/// Empty information about a partitioned table.
///
private StaticPartitionedTableInformation()
{
this.constructorArguments = null;
this.Config = null;
this.UriType = "";
this.Error = "";
this.Name = "";
this.Header = "";
this.PartitionCount = -1;
this.partitions = new List();
this.EstimatedSize = -1;
}
///
/// Create a class representing the set of edges between two stages in the job plan.
/// These collectively look like a partitioned table.
/// If the source stage has multiple outputs there is not enough information to return meaningful information.
///
/// Job whose slice we are displaying.
/// Static plan of the job.
/// Stage in the job which produces the data.
/// Delegate used to report errors.
/// If true include cancelled vertices.
public static StaticPartitionedTableInformation StageOutput(
DryadLinqJobInfo job,
DryadJobStaticPlan plan,
DryadJobStaticPlan.Stage source,
StatusReporter status,
bool showCancelled)
{
string header = "Output of " + source.Name;
// First check whether in the static plan this is virtual
while (source.IsTee)
{
var sourceInputs = plan.GetStageConnections(source, true).ToList();
if (sourceInputs.Count() != 1)
throw new DryadException("Unexpected number of inputs for stage " + source.Name);
source = sourceInputs.First().From;
}
// If we reached the input return information about that input
if (source.IsInput)
{
status("Scanning " + source.Name, StatusKind.LongOp);
StaticPartitionedTableInformation result = new StaticPartitionedTableInformation(job.ClusterConfiguration, source.UriType, source.Uri, source.Code, status);
result.Header = "Output of " + header;
result.constructorArguments = new SaveConstructorArguments
{
code = null,
source = source,
plan = plan
};
return result;
}
else
{
StaticPartitionedTableInformation result = new StaticPartitionedTableInformation();
result.Name = "Output of vertices in stage " + source.Name;
result.Header = "Output of " + header;
result.constructorArguments = new SaveConstructorArguments
{
code = null,
source = source,
plan = plan
};
// Check whether this stage has multiple outputs; this can only happen for 'Fork' operators.
var destinations = plan.GetStageConnections(source, false);
if (destinations.Count() > 1)
{
result.Error = "Cannot provide information about one of multiple outputs of a stage.";
return result;
}
DryadLinqJobStage stage = job.GetStage(source.Name);
if (stage == null)
{
result.Error = "There is no information about the output of stage " + source.Name;
return result;
}
result.EstimatedSize = 0;
result.PartitionCount = stage.TotalInitiatedVertices;
int count = 0;
foreach (ExecutedVertexInstance vi in stage.Vertices)
{
if (vi.State == ExecutedVertexInstance.VertexState.Successful ||
vi.State == ExecutedVertexInstance.VertexState.Failed ||
(showCancelled && vi.State == ExecutedVertexInstance.VertexState.Cancelled))
{
StaticPartitionInformation spi = new StaticPartitionInformation(count++, vi.DataWritten, vi.Name + " v." + vi.Version);
result.AddPartition(spi);
if (vi.DataWritten != -1)
result.EstimatedSize += vi.DataWritten;
}
}
return result;
}
}
///
/// Create a class to discover the information about a partitioned table given its uri.
///
/// Partitioned table uri.
/// Type of URI.
/// Delegate used to report errors.
/// Code associated to the stage (the uri does not contain the options, they may still be in the code).
/// Cluster where the job accessing the stream resides.
public StaticPartitionedTableInformation(ClusterConfiguration config, string uriType, string uri, string[] code, StatusReporter statusReporter)
{
this.Config = config;
this.UriType = uriType;
this.Error = "";
this.constructorArguments = new SaveConstructorArguments
{
// this is all we need
code = code
};
// really ugly, but the uri in the table does not longer contain the options; they were stripped by the DryadLINQ compiler.
if (code.Length > 0)
{
string firstline = code[0];
firstline = firstline.Trim('[', ']');
if (firstline.StartsWith("PartitionedTable:"))
firstline = firstline.Substring("PartitionedTable:".Length).Trim();
if (firstline.StartsWith(uri))
uri = firstline; // this may contain the options.
}
int indexoptions = uri.IndexOf("?");
if (indexoptions > 0)
{
this.Uri = uri.Substring(0, indexoptions);
this.Options = uri.Substring(indexoptions + 1);
}
else
{
this.Uri = uri;
this.Options = "";
}
this.Name = Path.GetFileName(this.Uri);
// default values
this.PartitionCount = -1;
this.partitions = new List();
this.EstimatedSize = -1;
this.Header = Path.GetFileName(this.Uri);
switch (uriType)
{
case "PartitionedFile":
this.ParsePartitionedFile(statusReporter);
break;
}
}
///
/// Generate a new view of the same partitioned table.
///
/// The new view.
/// Delegate used to report errors.
/// Job containing the table.
/// Show the cancelled vertices.
public StaticPartitionedTableInformation Refresh(DryadLinqJobInfo job, StatusReporter statusReporter, bool showCancelled)
{
if (this.constructorArguments.code != null)
return new StaticPartitionedTableInformation(this.Config, this.UriType, this.Uri, this.constructorArguments.code, statusReporter);
else
return StageOutput(job, this.constructorArguments.plan, this.constructorArguments.source, statusReporter, showCancelled);
}
///
/// The partitioned table is a partitioned file.
///
/// Delegate used to report errors.
private void ParsePartitionedFile(StatusReporter statusReporter)
{
this.EstimatedSize = 0;
try
{
if (!File.Exists(this.Uri))
{
this.Error = "File not found";
statusReporter("Cannot find file " + this.Uri, StatusKind.Error);
return;
}
PartitionedFileMetadata pfi = new PartitionedFileMetadata(new UNCPathname(this.Uri));
this.PartitionCount = pfi.NumberOfPartitions;
foreach (var p in pfi.Partitions)
{
StaticPartitionInformation spi = new StaticPartitionInformation(p.Number, p.Size, p.NumberOfReplicas);
this.partitions.Add(spi);
if (spi.PartitionSize >= 0)
this.EstimatedSize += spi.PartitionSize;
}
}
catch (Exception ex)
{
this.Error = ex.Message;
}
}
}
}