diff --git a/ClusterInterface/ClusterInterface.csproj b/ClusterInterface/ClusterInterface.csproj index 64a32ea..e2f16ff 100644 --- a/ClusterInterface/ClusterInterface.csproj +++ b/ClusterInterface/ClusterInterface.csproj @@ -1,6 +1,6 @@ - + - + Debug @@ -51,15 +51,36 @@ False - ..\packages\Microsoft.Research.Peloponnese.0.7.1-beta\lib\net45\Microsoft.Research.Peloponnese.HadoopBridge.dll + ..\packages\Microsoft.Research.Peloponnese.0.7.2-beta\lib\net45\Microsoft.Research.Peloponnese.HadoopBridge.dll - + False - ..\packages\Microsoft.Research.Peloponnese.0.7.1-beta\lib\net45\Microsoft.Research.Peloponnese.Utils.dll + ..\packages\Microsoft.Research.Peloponnese.0.7.2-beta\lib\net45\Microsoft.Research.Peloponnese.Utils.dll + + + False + ..\packages\Microsoft.Bcl.Async.1.0.166\lib\net40\Microsoft.Threading.Tasks.dll + + + ..\packages\Microsoft.Bcl.Async.1.0.166\lib\net40\Microsoft.Threading.Tasks.Extensions.dll + + + ..\packages\Microsoft.Bcl.Async.1.0.166\lib\net40\Microsoft.Threading.Tasks.Extensions.Desktop.dll + + + False + ..\packages\Microsoft.WindowsAzure.Common.1.0.1\lib\net45\Microsoft.WindowsAzure.Common.dll + + + False + ..\packages\Microsoft.WindowsAzure.Common.1.0.1\lib\net45\Microsoft.WindowsAzure.Common.NetFramework.dll ..\packages\Microsoft.WindowsAzure.ConfigurationManager.2.0.3\lib\net40\Microsoft.WindowsAzure.Configuration.dll + + ..\packages\Microsoft.WindowsAzure.Management.1.0.0\lib\net40\Microsoft.WindowsAzure.Management.dll + False ..\packages\Microsoft.WindowsAzure.Management.HDInsight.1.1.0.7\lib\net40\Microsoft.WindowsAzure.Management.HDInsight.dll @@ -72,17 +93,29 @@ False ..\packages\Microsoft.Hadoop.Client.1.1.0.7\lib\net40\Microsoft.WindowsAzure.Management.HDInsight.Framework.Core.dll + + False + ..\packages\Microsoft.WindowsAzure.Management.Storage.1.0.0\lib\net40\Microsoft.WindowsAzure.Management.Storage.dll + False ..\packages\WindowsAzure.Storage.3.1.0.1\lib\net40\Microsoft.WindowsAzure.Storage.dll - - False + ..\packages\Newtonsoft.Json.6.0.2\lib\net45\Newtonsoft.Json.dll + + + + ..\packages\Microsoft.Net.Http.2.2.19\lib\net45\System.Net.Http.Extensions.dll + + + ..\packages\Microsoft.Net.Http.2.2.19\lib\net45\System.Net.Http.Primitives.dll + + False ..\packages\System.Spatial.5.6.1\lib\net40\System.Spatial.dll @@ -105,14 +138,19 @@ + + + + + This project references NuGet package(s) that are missing on this computer. Enable NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. - - + + - + - + \ No newline at end of file diff --git a/ClusterInterface/Properties/AssemblyInfo.cs b/ClusterInterface/Properties/AssemblyInfo.cs index eab0852..ace9a43 100644 --- a/ClusterInterface/Properties/AssemblyInfo.cs +++ b/ClusterInterface/Properties/AssemblyInfo.cs @@ -51,5 +51,5 @@ using System.Runtime.InteropServices; // You can specify all the values or you can default the Build and Revision Numbers // by using the '*' as shown below: // [assembly: AssemblyVersion("1.0.*")] -[assembly: AssemblyVersion("0.1.0.0")] -[assembly: AssemblyFileVersion("0.1.0.0")] +[assembly: AssemblyVersion("0.1.2.0")] +[assembly: AssemblyFileVersion("0.1.2.0")] diff --git a/ClusterInterface/packages.config b/ClusterInterface/packages.config index 6cc3081..bd29d67 100644 --- a/ClusterInterface/packages.config +++ b/ClusterInterface/packages.config @@ -1,13 +1,21 @@ - + + + + - + + + + + + - + \ No newline at end of file diff --git a/DryadLinqGraphManager/DryadLinqGraphManager.csproj b/DryadLinqGraphManager/DryadLinqGraphManager.csproj index f7e3948..2619ed3 100644 --- a/DryadLinqGraphManager/DryadLinqGraphManager.csproj +++ b/DryadLinqGraphManager/DryadLinqGraphManager.csproj @@ -1,6 +1,6 @@ - + - + Debug AnyCPU @@ -71,15 +71,39 @@ False - ..\packages\Microsoft.Research.Peloponnese.0.7.1-beta\lib\net45\Microsoft.Research.Peloponnese.HadoopBridge.dll + ..\packages\Microsoft.Research.Peloponnese.0.7.2-beta\lib\net45\Microsoft.Research.Peloponnese.HadoopBridge.dll - + False - ..\packages\Microsoft.Research.Peloponnese.0.7.1-beta\lib\net45\Microsoft.Research.Peloponnese.Utils.dll + ..\packages\Microsoft.Research.Peloponnese.0.7.2-beta\lib\net45\Microsoft.Research.Peloponnese.Utils.dll + + + False + ..\packages\Microsoft.Bcl.Async.1.0.166\lib\net40\Microsoft.Threading.Tasks.dll + + + False + ..\packages\Microsoft.Bcl.Async.1.0.166\lib\net40\Microsoft.Threading.Tasks.Extensions.dll + + + False + ..\packages\Microsoft.Bcl.Async.1.0.166\lib\net40\Microsoft.Threading.Tasks.Extensions.Desktop.dll + + + False + ..\packages\Microsoft.WindowsAzure.Common.1.0.1\lib\net45\Microsoft.WindowsAzure.Common.dll + + + False + ..\packages\Microsoft.WindowsAzure.Common.1.0.1\lib\net45\Microsoft.WindowsAzure.Common.NetFramework.dll ..\packages\Microsoft.WindowsAzure.ConfigurationManager.2.0.3\lib\net40\Microsoft.WindowsAzure.Configuration.dll + + False + ..\packages\Microsoft.WindowsAzure.Management.1.0.0\lib\net40\Microsoft.WindowsAzure.Management.dll + False ..\packages\Microsoft.WindowsAzure.Management.HDInsight.1.1.0.7\lib\net40\Microsoft.WindowsAzure.Management.HDInsight.dll @@ -92,6 +116,10 @@ False ..\packages\Microsoft.Hadoop.Client.1.1.0.7\lib\net40\Microsoft.WindowsAzure.Management.HDInsight.Framework.Core.dll + + False + ..\packages\Microsoft.WindowsAzure.Management.Storage.1.0.0\lib\net40\Microsoft.WindowsAzure.Management.Storage.dll + False ..\packages\WindowsAzure.Storage.3.1.0.1\lib\net40\Microsoft.WindowsAzure.Storage.dll @@ -108,6 +136,17 @@ 3.5 + + + + False + ..\packages\Microsoft.Net.Http.2.2.19\lib\net45\System.Net.Http.Extensions.dll + + + False + ..\packages\Microsoft.Net.Http.2.2.19\lib\net45\System.Net.Http.Primitives.dll + + False ..\packages\System.Spatial.5.6.1\lib\net40\System.Spatial.dll @@ -164,14 +203,19 @@ + + + + + This project references NuGet package(s) that are missing on this computer. Enable NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. - - + + - + - + \ No newline at end of file diff --git a/DryadLinqGraphManager/Properties/AssemblyInfo.cs b/DryadLinqGraphManager/Properties/AssemblyInfo.cs index ea2eb9c..bc4e5c4 100644 --- a/DryadLinqGraphManager/Properties/AssemblyInfo.cs +++ b/DryadLinqGraphManager/Properties/AssemblyInfo.cs @@ -51,5 +51,5 @@ using System.Runtime.InteropServices; // You can specify all the values or you can default the Build and Revision Numbers // by using the '*' as shown below: // [assembly: AssemblyVersion("1.0.*")] -[assembly: AssemblyVersion("0.1.0.0")] -[assembly: AssemblyFileVersion("0.1.0.0")] +[assembly: AssemblyVersion("0.1.2.0")] +[assembly: AssemblyFileVersion("0.1.2.0")] diff --git a/DryadLinqGraphManager/packages.config b/DryadLinqGraphManager/packages.config index 6cc3081..bd29d67 100644 --- a/DryadLinqGraphManager/packages.config +++ b/DryadLinqGraphManager/packages.config @@ -1,13 +1,21 @@ - + + + + - + + + + + + - + \ No newline at end of file diff --git a/DryadLinqTests/ApplyAndForkTests.cs b/DryadLinqTests/ApplyAndForkTests.cs new file mode 100644 index 0000000..b523841 --- /dev/null +++ b/DryadLinqTests/ApplyAndForkTests.cs @@ -0,0 +1,270 @@ +using Microsoft.Research.DryadLinq; +using Microsoft.Research.Peloponnese.Storage; +using System; +using System.Collections.Generic; +using System.Linq; + +namespace DryadLinqTests +{ + public class ApplyAndForkTests + { + public static IEnumerable NonHomomorphic_Unary_Func(IEnumerable input) + { + return input; + } + + // [DistributiveOverConcat] + public static IEnumerable Homomorphic_Unary_Func(IEnumerable input) + { + return input; + } + + public static IEnumerable NonHomomorphic_Binary_Func(IEnumerable left, IEnumerable right) + { + return left; + } + + // [LeftDistributiveOverConcat] + public static IEnumerable LeftHomomorphic_Binary_Func(IEnumerable left, IEnumerable right) + { + return left; + } + + // Note: an apply function must only consume each enumerable once, and it must produce an enumerable + // So for a simple pass-through function that does a little work, we must enumerate only once. + // Else we get the error: "An HpcLinq channel can't be read more than once." + // [DistributiveOverConcat] + public static IEnumerable FullHomomorphic_Binary_Func(IEnumerable left, IEnumerable right) + { + long cLeft = 0; + foreach (int x in left) + { + cLeft++; + yield return x; + } + + long cRight = 0; + foreach (int x in right) + { + cRight++; + yield return x; + } + + if (cLeft == 0) + throw new Exception("a node received empty left-data"); + + if (cRight == 0) + throw new Exception("a node received empty right-data"); + } + + + public static bool NonHomomorphicUnaryApply() + { + var context = new DryadLinqContext(Config.cluster); + context.LocalExecution = false; + bool passed = true; + try + { + string outFile = "unittest/output/NonHomomorphicUnaryApply.out"; + + IQueryable input = context.FromStore(AzureUtils.ToAzureUri(Config.accountName, Config.containerName, + "unittest/inputdata/SimpleFile.txt")); + + IQueryable> simple = input.Apply(x => DataGenerator.CreateSimpleFileSets()); + IQueryable pt1 = simple.Select(x => x.First()); + + var q1 = pt1.ApplyPerPartition(x => NonHomomorphic_Unary_Func(x)); + var jobInfo = q1.ToStore(outFile).Submit(); + jobInfo.Wait(); + + passed &= Utils.FileExists(Config.accountName, Config.storageKey, Config.containerName, outFile); + } + catch (DryadLinqException) + { + passed &= false; + } + return passed; + } + + public static bool HomomorphicUnaryApply() + { + var context = new DryadLinqContext(Config.cluster); + context.LocalExecution = false; + bool passed = true; + try + { + string outFile = "unittest/output/HomomorphicUnaryApply.out"; + + IQueryable input = context.FromStore(AzureUtils.ToAzureUri(Config.accountName, Config.containerName, + "unittest/inputdata/SimpleFile.txt")); + + IQueryable> simple = input.Apply(x => DataGenerator.CreateSimpleFileSets()); + IQueryable pt1 = simple.Select(x => x.First()); + + var q1 = pt1.ApplyPerPartition(x => Homomorphic_Unary_Func(x)); + var jobInfo = q1.ToStore(outFile).Submit(); + jobInfo.Wait(); + + passed &= Utils.FileExists(Config.accountName, Config.storageKey, Config.containerName, outFile); + } + catch (DryadLinqException) + { + passed &= false; + } + return passed; + } + + public static bool NonHomomorphicBinaryApply() + { + var context = new DryadLinqContext(Config.cluster); + context.LocalExecution = false; + bool passed = true; + try + { + string outFile = "unittest/output/NonHomomorphicBinaryApply.out"; + + IQueryable input = context.FromStore(AzureUtils.ToAzureUri(Config.accountName, Config.containerName, + "unittest/inputdata/SimpleFile.txt")); + + IQueryable> simple = input.Apply(x => DataGenerator.CreateSimpleFileSets()); + IQueryable pt1 = simple.Select(x => x.First()); + + var q1 = pt1.Apply(pt1, (x, y) => NonHomomorphic_Binary_Func(x, y)); + var jobInfo = q1.ToStore(outFile).Submit(); + jobInfo.Wait(); + + passed &= Utils.FileExists(Config.accountName, Config.storageKey, Config.containerName, outFile); + } + catch (DryadLinqException) + { + passed &= false; + } + return passed; + } + + public static bool LeftHomomorphicBinaryApply() + { + var context = new DryadLinqContext(Config.cluster); + context.LocalExecution = false; + bool passed = true; + try + { + string outFile = "unittest/output/LeftHomomorphicBinaryApply.out"; + + IQueryable input = context.FromStore(AzureUtils.ToAzureUri(Config.accountName, Config.containerName, + "unittest/inputdata/SimpleFile.txt")); + + IQueryable> simple = input.Apply(x => DataGenerator.CreateSimpleFileSets()); + IQueryable pt1 = simple.Select(x => x.First()); + + var q1 = pt1.ApplyPerPartition(pt1, (x, y) => LeftHomomorphic_Binary_Func(x, y), true); + var jobInfo = q1.ToStore(outFile).Submit(); + jobInfo.Wait(); + + passed &= Utils.FileExists(Config.accountName, Config.storageKey, Config.containerName, outFile); + } + catch (DryadLinqException) + { + passed &= false; + } + return passed; + } + + public static bool FullHomomorphicBinaryApply_DifferentDataSets() + { + var context = new DryadLinqContext(Config.cluster); + context.LocalExecution = false; + bool passed = true; + try + { + string outFile = "unittest/output/FullHomomorphicBinaryApply_DifferentDataSets.out"; + + IQueryable input = context.FromStore(AzureUtils.ToAzureUri(Config.accountName, Config.containerName, + "unittest/inputdata/SimpleFile.txt")); + + IQueryable> simple = input.Apply(x => DataGenerator.CreateSimpleFileSets()); + IQueryable pt1 = simple.Select(x => x.First()); + IQueryable pt2 = simple.Select(x => x.First()); + + var q1 = pt1.ApplyPerPartition(pt2, (x, y) => FullHomomorphic_Binary_Func(x, y), false); + var jobInfo = q1.ToStore(outFile).Submit(); + jobInfo.Wait(); + + passed &= Utils.FileExists(Config.accountName, Config.storageKey, Config.containerName, outFile); + } + catch (DryadLinqException) + { + passed &= false; + } + return passed; + } + + public static bool FullHomomorphicBinaryApply_IdenticalDataSets() + { + var context = new DryadLinqContext(Config.cluster); + context.LocalExecution = false; + bool passed = true; + try + { + string outFile = "unittest/output/FullHomomorphicBinaryApply_2.out"; + + IQueryable input = context.FromStore(AzureUtils.ToAzureUri(Config.accountName, Config.containerName, + "unittest/inputdata/SimpleFile.txt")); + + IQueryable> simple = input.Apply(x => DataGenerator.CreateSimpleFileSets()); + IQueryable pt1 = simple.Select(x => x.First()); + var q1 = pt1.ApplyPerPartition(pt1, (x, y) => FullHomomorphic_Binary_Func(x, y), false); + var jobInfo = q1.ToStore(outFile).Submit(); + jobInfo.Wait(); + + passed &= Utils.FileExists(Config.accountName, Config.storageKey, Config.containerName, outFile); + } + catch (DryadLinqException) + { + passed &= false; + } + return passed; + } + + [Associative(typeof(AssociativeRecursive1))] + public static string IntToStringCSVAggregator(string agg, int next) + { + return agg + "," + next.ToString(); + } + public class AssociativeRecursive1 : IAssociative + { + public string Seed() + { + return ""; + } + public string RecursiveAccumulate(string first, string second) + { + return first + second; + } + } + + public static bool Aggregate_WithCombiner() + { + var context = new DryadLinqContext(Config.cluster); + context.LocalExecution = false; + bool passed = true; + try + { + IQueryable input = context.FromStore(AzureUtils.ToAzureUri(Config.accountName, Config.containerName, + "unittest/inputdata/SimpleFile.txt")); + + IQueryable> simple = input.Apply(x => DataGenerator.CreateSimpleFileSets()); + IQueryable pt1 = simple.Select(x => x.First()); + + string q1 = pt1.Aggregate("", (str, x) => IntToStringCSVAggregator(str, x)); + + passed &= (q1.Length == 27); // string should have numbers 1..12 separated by commas + } + catch (DryadLinqException) + { + passed &= false; + } + return passed; + } + } +} diff --git a/DryadLinqTests/BasicAPITests.cs b/DryadLinqTests/BasicAPITests.cs new file mode 100644 index 0000000..532a127 --- /dev/null +++ b/DryadLinqTests/BasicAPITests.cs @@ -0,0 +1,1087 @@ +using Microsoft.Research.DryadLinq; +using Microsoft.Research.Peloponnese.Storage; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Linq.Expressions; + +namespace DryadLinqTests +{ + public class BasicAPITests + { + public static bool ToStoreThrowsForNonQuery() + { + bool passed = true; + try + { + int[] data = new[] { 1, 2, 3 }; + var q1 = data.AsQueryable().Select(x => 100 + x).ToStore(AzureUtils.ToAzureUri(Config.accountName, Config.storageKey, Config.containerName, + "dummy")).ToArray(); + //Should throw as we got into DryadLinq via AsQueryable() rather than via context. + passed &= false; + } + catch (ArgumentException) + { + //expected + } + return passed; + } + + public static bool ToStoreGetEnumeratorThrows() // pass + { + var context = new DryadLinqContext(Config.cluster); + context.LocalExecution = false; + bool passed = true; + try + { + string outFile = "unittest/output/ToStoreGetEnumeratorThrows.txt"; + + IQueryable input = context.FromStore(AzureUtils.ToAzureUri(Config.accountName, Config.containerName, + "unittest/inputdata/SimpleFile.txt")); + IQueryable> simple = input.Apply(x => DataGenerator.CreateSimpleFileSets()); + + IQueryable pt1 = simple.Select(x => x.First()); + IQueryable q1 = pt1.Select(x => 100 + x); + + var output = q1.ToStore(AzureUtils.ToAzureUri(Config.accountName, Config.containerName, outFile), true); + output.GetEnumerator(); + } + catch (DryadLinqException) + { + passed &= false; + } + return passed; + } + + public static bool GetEnumeratorNonToStoreTerminated() + { + var context = new DryadLinqContext(Config.cluster); + context.LocalExecution = false; + bool passed = true; + try + { + IQueryable input = context.FromStore(AzureUtils.ToAzureUri(Config.accountName, Config.storageKey, Config.containerName, + "unittest/inputdata/SimpleFile.txt")); + + IQueryable> simple = input.Apply(x => DataGenerator.CreateSimpleFileSets()); + IQueryable pt1 = simple.Select(x => x.First()); + + IQueryable q1 = pt1.Select(x => 100 + x); + IQueryable q2 = q1.Where(x => true); + foreach (int x in q2) // throws here + { + //Console.WriteLine(x); + } + //@TODO: perform a sequence-equals test. + + //IQueryable format = q2.Select(x => new LineRecord(String.Format("{0}", x))); + //DryadLinqJobInfo output = format.ToStore(AzureUtils.ToAzureUri(Config.accountName, Config.storageKey, Config.containerName, + // "unittest/output/test2.txt")).SubmitAndWait(); + } + catch (DryadLinqException) + { + passed &= false; + } + return passed; + } + + public static bool ToStoreSubmitGetEnumerator() // pass + { + var context = new DryadLinqContext(Config.cluster); + context.LocalExecution = false; + bool passed = true; + try + { + string outFile = "unittest/output/ToStoreSubmitGetEnumerator.txt"; + + IQueryable input = context.FromStore(AzureUtils.ToAzureUri(Config.accountName, Config.storageKey, Config.containerName, + "unittest/inputdata/SimpleFile.txt")); + + IQueryable> simple = input.Apply(x => DataGenerator.CreateSimpleFileSets()); + IQueryable pt1 = simple.Select(x => x.First()); + var q1 = pt1.Select(x => 100 + x).HashPartition(x => x); + var q2 = q1.Where(x => true); + IQueryable output = q2.ToStore(AzureUtils.ToAzureUri(Config.accountName, Config.storageKey, Config.containerName, outFile), true); + DryadLinqJobInfo info = output.SubmitAndWait(); + + foreach (int x in output) // should not run a new dryad job. + { + //Console.WriteLine(x); + } + } + catch (DryadLinqException) + { + passed &= false; + } + return passed; + } + + public static bool SubmitNonToStoreTerminated() + { + var context = new DryadLinqContext(Config.cluster); + context.LocalExecution = false; + bool passed = true; + try + { + IQueryable input = context.FromStore(AzureUtils.ToAzureUri(Config.accountName, Config.storageKey, Config.containerName, + "unittest/inputdata/SimpleFile.txt")); + + IQueryable> simple = input.Apply(x => DataGenerator.CreateSimpleFileSets()); + IQueryable pt1 = simple.Select(x => x.First()); + + var q1 = pt1.Select(x => 100 + x); + var q2 = q1.Where(x => true); + q2.SubmitAndWait(); // throws here + var outPT = q2.ToList(); + foreach (int x in outPT) + { + //Console.WriteLine(x); + } + } + catch (DryadLinqException) + { + passed &= false; + } + return passed; + } + + public static bool MaterializeToStoreTerminated() + { + var context = new DryadLinqContext(Config.cluster); + context.LocalExecution = false; + bool passed = true; + try + { + string outFile_a = "unittest/output/MaterializeToStoreTerminated_a.txt"; + string outFile_b = "unittest/output/MaterializeToStoreTerminated_b.txt"; + + IQueryable input = context.FromStore(AzureUtils.ToAzureUri(Config.accountName, Config.storageKey, Config.containerName, + "unittest/inputdata/SimpleFile.txt")); + + IQueryable> simple = input.Apply(x => DataGenerator.CreateSimpleFileSets()); + IQueryable pt1 = simple.Select(x => x.First()); + IQueryable query = pt1.Select(x => 100 + x); + + var q1 = query.ToStore(AzureUtils.ToAzureUri(Config.accountName, Config.storageKey, Config.containerName, outFile_a), true); //stream name w/o prefixed slash + + var q2 = query.Where(x => true).ToStore(AzureUtils.ToAzureUri(Config.accountName, Config.storageKey, Config.containerName, outFile_b), true); //stream name w/ prefixed slash + + DryadLinqQueryable.Submit(q1, q2); //materialize // throws + + var __unused2 = q1.Select(x => x); // Legal call, but BLOCKS + foreach (int x in q2) + { + //Console.WriteLine(x); + } + + passed &= Utils.FileExists(Config.accountName, Config.storageKey, Config.containerName, outFile_a); + passed &= Utils.FileExists(Config.accountName, Config.storageKey, Config.containerName, outFile_b); + + //@TODO: assert that only one query execution occurred. + } + catch (DryadLinqException) + { + passed &= false; + } + return passed; + } + + public static bool MaterializeNonToStoreTerminated() + { + var context = new DryadLinqContext(Config.cluster); + context.LocalExecution = false; + bool passed = true; + try + { + IQueryable input = context.FromStore(AzureUtils.ToAzureUri(Config.accountName, Config.storageKey, Config.containerName, + "unittest/inputdata/SimpleFile.txt")); + + IQueryable> simple = input.Apply(x => DataGenerator.CreateSimpleFileSets()); + IQueryable pt1 = simple.Select(x => x.First()); + IQueryable query = pt1.Select(x => 100 + x); + + DryadLinqQueryable.Submit(query); //materialize // throws + + foreach (int x in query) + { + //Console.WriteLine(x); + } + + //@TODO: assert that only one query execution occurred. + } + catch (DryadLinqException) + { + passed &= false; + } + return passed; + } + + public static bool EnumeratePlainData() + { + var context = new DryadLinqContext(Config.cluster); + context.LocalExecution = false; + bool passed = true; + try + { + IQueryable input = context.FromStore(AzureUtils.ToAzureUri(Config.accountName, Config.storageKey, Config.containerName, + "unittest/inputdata/SimpleFile.txt")); + + IQueryable> simple = input.Apply(x => DataGenerator.CreateSimpleFileSets()); + IQueryable pt1 = simple.Select(x => x.First()); + foreach (int x in pt1) // throws + { + //Console.WriteLine(x); + } + } + catch (DryadLinqException) + { + passed &= false; + } + return passed; + } + + public static bool CopyPlainDataViaToStoreSubmit() + { + var context = new DryadLinqContext(Config.cluster); + context.LocalExecution = false; + bool passed = true; + try + { + string outFile = "unittest/output/CopyPlainDataViaToStoreSubmit.txt"; + + IQueryable input = context.FromStore(AzureUtils.ToAzureUri(Config.accountName, Config.storageKey, Config.containerName, + "unittest/inputdata/SimpleFile.txt")); + + IQueryable> simple = input.Apply(x => DataGenerator.CreateSimpleFileSets()); + IQueryable pt1 = simple.Select(x => x.First()); + + var q = pt1.ToStore(AzureUtils.ToAzureUri(Config.accountName, Config.storageKey, Config.containerName, outFile), true); + DryadLinqJobInfo info = q.Submit(); + info.Wait(); + + foreach (int x in q) + { + //Console.WriteLine(x); + } + + passed &= Utils.FileExists(Config.accountName, Config.storageKey, Config.containerName, outFile); + } + catch (DryadLinqException) + { + passed &= false; + } + return passed; + } + + public static bool CopyPlainDataViaToStoreMaterialize() + { + var context = new DryadLinqContext(Config.cluster); + context.LocalExecution = false; + bool passed = true; + try + { + string outFile = "unittest/output/CopyPlainDataViaToStoreMaterialize.txt"; + + IQueryable input = context.FromStore(AzureUtils.ToAzureUri(Config.accountName, Config.storageKey, Config.containerName, + "unittest/inputdata/SimpleFile.txt")); + + IQueryable> simple = input.Apply(x => DataGenerator.CreateSimpleFileSets()); + IQueryable pt1 = simple.Select(x => x.First()); + + var q = pt1.ToStore(AzureUtils.ToAzureUri(Config.accountName, Config.storageKey, Config.containerName, outFile), true); + DryadLinqJobInfo info = DryadLinqQueryable.Submit(q); + info.Wait(); + + foreach (int x in q) + { + //Console.WriteLine(x); + } + + passed &= Utils.FileExists(Config.accountName, Config.storageKey, Config.containerName, outFile); + } + catch (DryadLinqException) + { + passed &= false; + } + return passed; + } + /* + public static bool PlainEnumerableAsDryadQueryToStoreSubmit() + { + var context = new DryadLinqContext(Config.cluster); + context.LocalExecution = false; + bool passed = true; + try + { + string outFile = "unittest/output/PlainEnumerableAsDryadQueryToStoreSubmit.txt"; + + int[] plainData = { 5, 6, 7 }; + + var q = context.AsDryadQuery(plainData, CompressionScheme.None).ToStore(AzureUtils.ToAzureUri(Config.accountName, Config.storageKey, Config.containerName, outFile); + DryadLinqJobInfo info = q.Submit(); + info.Wait(); + + foreach (int x in q) + { + //Console.WriteLine(x); + } + + passed &= Utils.FileExists(Config.accountName, Config.storageKey, Config.containerName, outFile); + } + catch (DryadLinqException e) + { + passed &= false; + } + return passed; + } + */ + public static bool RepeatSubmit() + { + var context = new DryadLinqContext(Config.cluster); + context.LocalExecution = false; + bool passed = true; + try + { + string outFile = "unittest/output/RepeatSubmit.txt"; + + IQueryable input = context.FromStore(AzureUtils.ToAzureUri(Config.accountName, Config.storageKey, Config.containerName, + "unittest/inputdata/SimpleFile.txt")); + + IQueryable> simple = input.Apply(x => DataGenerator.CreateSimpleFileSets()); + IQueryable pt1 = simple.Select(x => x.First()); + + + var q = pt1.ToStore(AzureUtils.ToAzureUri(Config.accountName, Config.storageKey, Config.containerName, outFile), true); + DryadLinqJobInfo info1 = null; + DryadLinqJobInfo info2 = null; + try + { + info1 = q.Submit(); + info2 = q.Submit(); // does not throw + + if (!context.LocalDebug) + { + passed &= false; + } + } + catch (ArgumentException) + { + passed &= true; + } + + //wait for any jobs to complete. + if (info1 != null) + { + info1.Wait(); + } + + if (info2 != null) + { + info2.Wait(); + } + } + catch (DryadLinqException) + { + passed &= false; + } + return passed; + } + + public static bool RepeatMaterialize() + { + var context = new DryadLinqContext(Config.cluster); + context.LocalExecution = false; + bool passed = true; + try + { + string outFile = "unittest/output/RepeatMaterialize.txt"; + + IQueryable input = context.FromStore(AzureUtils.ToAzureUri(Config.accountName, Config.storageKey, Config.containerName, + "unittest/inputdata/SimpleFile.txt")); + + IQueryable> simple = input.Apply(x => DataGenerator.CreateSimpleFileSets()); + IQueryable pt1 = simple.Select(x => x.First()); + + var q = pt1.ToStore(AzureUtils.ToAzureUri(Config.accountName, Config.storageKey, Config.containerName, outFile), true); + DryadLinqJobInfo info1 = null; + DryadLinqJobInfo info2 = null; + try + { + info1 = DryadLinqQueryable.Submit(new[] { q }); //materialize + info2 = DryadLinqQueryable.Submit(new[] { q }); //materialize // does not throw + + if (!context.LocalDebug) + { + passed &= false; + } + } + catch (ArgumentException) + { + passed &= true; + } + + //wait for any jobs to complete. + if (info1 != null) + { + info1.Wait(); + } + + if (info2 != null) + { + info2.Wait(); + } + } + catch (DryadLinqException) + { + passed &= false; + } + return passed; + } + + public static bool MaterializeMentionsSameQueryTwice() // pass + { + var context = new DryadLinqContext(Config.cluster); + context.LocalExecution = false; + bool passed = true; + try + { + string outFile = "unittest/output/MaterializeMentionsSameQueryTwice.txt"; + + IQueryable input = context.FromStore(AzureUtils.ToAzureUri(Config.accountName, Config.storageKey, Config.containerName, + "unittest/inputdata/SimpleFile.txt")); + + IQueryable> simple = input.Apply(x => DataGenerator.CreateSimpleFileSets()); + IQueryable pt1 = simple.Select(x => x.First()); + + var q = pt1.ToStore(AzureUtils.ToAzureUri(Config.accountName, Config.storageKey, Config.containerName, outFile), true); + DryadLinqJobInfo info1 = null; + try + { + info1 = DryadLinqQueryable.Submit(q, q); //materialize // throws + passed &= false; // for Config.cluster execution, second materialize should throw; + } + catch (ArgumentException) + { + passed &= true; + } + + //wait for any jobs to complete. + if (info1 != null) + { + info1.Wait(); + } + } + catch (DryadLinqException) + { + passed &= false; + } + return passed; + } + + public static bool QueryOnDataBackedDLQ() + { + var context = new DryadLinqContext(Config.cluster); + context.LocalExecution = false; + bool passed = true; + try + { + string outFile = "unittest/output/QueryOnDataBackedDLQ.txt"; + + IQueryable input = context.FromStore(AzureUtils.ToAzureUri(Config.accountName, Config.containerName, + "unittest/inputdata/SimpleFile.txt")); + + IQueryable> simple = input.Apply(x => DataGenerator.CreateSimpleFileSets()); + IQueryable pt1 = simple.Select(x => x.First()); + var q = pt1.Select(x => 100 + x); + var outPT = q.ToStore(AzureUtils.ToAzureUri(Config.accountName, Config.containerName, outFile), true); + outPT.Submit(); + + var outPT2_dummy_notUsed = outPT.Select(x => x); //BLOCKS HERE until the input is concrete + // source.Expression returns an expression for the backingDataDLQ + // CheckAndInitialize() on the backingData will block. + + passed &= Utils.FileExists(Config.accountName, Config.storageKey, Config.containerName, outFile); + + foreach (int x in outPT) + { + //Console.WriteLine(x); + } + } + catch (DryadLinqException) + { + passed &= false; + } + return passed; + } + + public static bool Bug11781_CountandFirstOrDefault() + { + var context = new DryadLinqContext(Config.cluster); + context.LocalExecution = false; + bool passed = true; + try + { + string outFile = "unittest/output/Bug11781.out"; + + IQueryable input = context.FromStore(AzureUtils.ToAzureUri(Config.accountName, Config.containerName, + "unittest/inputdata/SimpleFile.txt")); + + IQueryable> simple = input.Apply(x => DataGenerator.CreateSimpleFileSets()); + IQueryable pt1 = simple.Select(x => x.First()); + + //Test Count() + var c = pt1.Count(); + + //Test CountAsQuery() + var q = pt1.CountAsQuery().ToStore(outFile); + DryadLinqJobInfo info = q.Submit(); + info.Wait(); + + passed &= Utils.FileExists(Config.accountName, Config.storageKey, Config.containerName, outFile); + + // Also test FirstOrDefault + // the affected code for dlq.Execute() also has a branch for FirstOrDefault() and friends. + int y = pt1.FirstOrDefault(); + } + catch (DryadLinqException) + { + passed &= false; + } + return passed; + } + + public static bool Bug11782_Aggregate() + { + var context = new DryadLinqContext(Config.cluster); + context.LocalExecution = false; + bool passed = true; + try + { + string outFile = "unittest/output/Bug11782_Aggregate.out"; + + IQueryable input = context.FromStore(AzureUtils.ToAzureUri(Config.accountName, Config.containerName, + "unittest/inputdata/SimpleFile.txt")); + + IQueryable> simple = input.Apply(x => DataGenerator.CreateSimpleFileSets()); + IQueryable pt1 = simple.Select(x => x.First()); + + //test Aggregate() + var c = pt1.Select(x => x).Aggregate((x, y) => x + y); + + //test AggregateAsQuery() + var q = pt1.Select(x => x).AggregateAsQuery((x, y) => x + y).ToStore(outFile); + DryadLinqJobInfo info = DryadLinqQueryable.Submit(q); + info.Wait(); + + passed &= Utils.FileExists(Config.accountName, Config.storageKey, Config.containerName, outFile); + } + catch (DryadLinqException) + { + passed &= false; + } + return passed; + } + + public static bool Bug11782_LowLevelQueryableManipulation() + { + var context = new DryadLinqContext(Config.cluster); + context.LocalExecution = false; + bool passed = true; + try + { + IQueryable input = context.FromStore(AzureUtils.ToAzureUri(Config.accountName, Config.containerName, + "unittest/inputdata/SimpleFile.txt")); + + IQueryable> simple = input.Apply(x => DataGenerator.CreateSimpleFileSets()); + IQueryable pt1 = simple.Select(x => x.First()); + + Expression lambda = Expression.Lambda>( + Expression.Constant(1), + new[] { Expression.Parameter(typeof(int), "x") }); + var z = pt1.Provider.CreateQuery( + Expression.Call( + typeof(Queryable), "Select", + new Type[] { pt1.ElementType, pt1.ElementType }, + pt1.Expression, Expression.Quote(lambda))); + + passed &= false; // the use of non-generic Provider.CreateQuery() should have thrown + } + catch (DryadLinqException) + { + passed &= true; + } + return passed; + } + + public static bool Bug11638_LongWhere() + { + var context = new DryadLinqContext(Config.cluster); + context.LocalExecution = false; + bool passed = true; + try + { + string outFile = "unittest/output/BasicAPITests_LongWhere.out"; + + IQueryable input = context.FromStore(AzureUtils.ToAzureUri(Config.accountName, Config.containerName, + "unittest/inputdata/SimpleFile.txt")); + + IQueryable> simple = input.Apply(x => DataGenerator.CreateSimpleFileSets()); + IQueryable pt1 = simple.Select(x => x.First()); + + var q = pt1.Select(x => 100 + x); + var outPT = q.LongWhere((x, i) => true).ToStore(outFile); + var info = outPT.Submit(); + info.Wait(); + + passed &= Utils.FileExists(Config.accountName, Config.storageKey, Config.containerName, outFile); + + } + catch (DryadLinqException) + { + passed &= false; + } + return passed; + } + + public static bool AssumeRangePartition() + { + var context = new DryadLinqContext(Config.cluster); + context.LocalExecution = false; + bool passed = true; + try + { + string outFile = "unittest/output/BasicAPITests_AssumeRangePartition.out"; + + IQueryable input = context.FromStore(AzureUtils.ToAzureUri(Config.accountName, Config.containerName, + "unittest/inputdata/SimpleFile.txt")); + + IQueryable> simple = input.Apply(x => DataGenerator.CreateSimpleFileSets()); + IQueryable pt1 = simple.Select(x => x.First()); + + var q = + pt1 + .AssumeRangePartition(x => x, false) + .Select(x => 100 + x).ToStore(outFile); + var info = q.Submit(); + info.Wait(); + + passed &= Utils.FileExists(Config.accountName, Config.storageKey, Config.containerName, outFile); + + } + catch (DryadLinqException) + { + passed &= false; + } + return passed; + } + + public static bool Bug11638_LongMethods() + { + var context = new DryadLinqContext(Config.cluster); + context.LocalExecution = false; + bool passed = true; + try + { + string outFile = "unittest/output/Bug11638_LongMethods.out"; + + IQueryable input = context.FromStore(AzureUtils.ToAzureUri(Config.accountName, Config.containerName, + "unittest/inputdata/SimpleFile.txt")); + + IQueryable> simple = input.Apply(x => DataGenerator.CreateSimpleFileSets()); + IQueryable pt1 = simple.Select(x => x.First()); + + var q = + pt1 + .LongSelect((x, i) => x) + .LongWhere((x, i) => true) + .LongSelectMany((x, i) => new[] { x }) + .LongSelectMany((x, i) => new[] { x }, (i, seq) => seq) //overload#2 + .LongTakeWhile((x, i) => true) + .LongSkipWhile((x, i) => false) + .ToStore(outFile); + var info = q.Submit(); + info.Wait(); + + passed &= Utils.FileExists(Config.accountName, Config.storageKey, Config.containerName, outFile); + + } + catch (DryadLinqException) + { + passed &= false; + } + return passed; + } + + public static bool ContextConfigIsReadOnly() + { + var context = new DryadLinqContext(Config.cluster); + context.LocalExecution = false; + bool passed = true; + + try + { + string jobName = context.JobFriendlyName; + context.JobFriendlyName = "bob"; + context.JobFriendlyName = jobName; + } + catch (NotSupportedException) + { + passed &= false; // "an exception should not thrown"; + } + + try + { + context.JobMinNodes = 120; + passed &= false; // "an exception should not thrown"; + } + catch (NotSupportedException) + { + //expected + } + + try + { + context.ResourcesToAdd.Add("blah"); + passed &= false; // "an exception should not thrown"; + } + catch (NotSupportedException) + { + //expected + } + + try + { + context.ResourcesToRemove.Add("blah"); + passed &= false; // "an exception should not thrown"; + } + catch (NotSupportedException) + { + //expected + } + + try + { + context.JobEnvironmentVariables.Add("bob", "bob"); + passed &= false; // "an exception should not thrown"; + } + catch (NotSupportedException) + { + //expected + } + + try + { + context.EnableSpeculativeDuplication = false; + passed &= false; // "an exception should not thrown"; + } + catch (NotSupportedException) + { + //expected + } + + return passed; + } + + public static bool ToggleSpeculativeDuplication() + { + var context = Utils.MakeBasicConfig(Config.cluster); + context.LocalExecution = false; + bool passed = true; + try + { + passed &= !context.EnableSpeculativeDuplication; // "Speculative Duplication enabled by default" + context.EnableSpeculativeDuplication = true; + passed &= context.EnableSpeculativeDuplication; // "Failed to enable speculative duplication" + context.EnableSpeculativeDuplication = false; + passed &= !context.EnableSpeculativeDuplication; // "Failed to disable speculative duplication" + context.EnableSpeculativeDuplication = false; + // ??? DryadLinqContext testContext = new DryadLinqContext(context); + // ??? passed &= !testContext.EnableSpeculativeDuplication; // "Speculative Duplication enabled after copy" + + } + catch (DryadLinqException) + { + passed &= false; // "Enabling and disabling speculative duplication should not throw" + } + return passed; + } + + public static bool Bug15068_ConfigResourcesAPI() + { + var context = new DryadLinqContext(Config.cluster); + context.LocalExecution = false; + bool passed = true; + try + { + context.HeadNode = "MIKELID7"; // ??? + passed &= (context.ResourcesToAdd.IsReadOnly == false); // "isReadOnly should be false" + passed &= (context.ResourcesToRemove.IsReadOnly == false); // "isReadOnly should be false" + + //clear + context.ResourcesToAdd.Clear(); + context.ResourcesToRemove.Clear(); + + //add + context.ResourcesToAdd.Add("abc"); + context.ResourcesToRemove.Add("def"); + context.ResourcesToRemove.Add("ghi"); + + //index, count, getEnumerator + passed &= (context.ResourcesToAdd[0] == "abc"); // "wrong value" + passed &= (context.ResourcesToAdd.Count == 1); // "wrong value" + + passed &= (context.ResourcesToRemove[0] == "def"); // "wrong value" + passed &= (context.ResourcesToRemove.Where((x, i) => (i == 1)).First() == "ghi"); // "wrong value" + passed &= (context.ResourcesToRemove.Count == 2); // "wrong value" + + // ??? + //// read-only. + //DryadLinqContext ctx = new DryadLinqContext(context); + //passed &= (ctx.ResourcesToAdd.IsReadOnly == true); // "isReadOnly should be true" + //passed &= (ctx.ResourcesToRemove.IsReadOnly == true); // "isReadOnly should be true" + + // clone was taken. + context.ResourcesToAdd.Clear(); + context.ResourcesToRemove.Clear(); + // ??? + //passed &= (ctx.ResourcesToAdd.Count == 1); // "should be unaffected" + //passed &= (ctx.ResourcesToRemove.Count == 2); // "should be unaffected" + } + catch (DryadLinqException) + { + passed &= false; + } + return passed; + } + + public static bool Bug14449_ContextShouldExposeVersionIDs() + { + var context = new DryadLinqContext(Config.cluster); + context.LocalExecution = false; + bool passed = true; + try + { + // ??? + //passed &= (context.Major >= 3); // "problem with HpcLinq client version" + //passed &= (context.Major >= 3); // "problem with HpcLinq server version" + + //passed &= (context.ClientVersion.Major >= 3); // "problem with Dsc client version" + //passed &= (context.ServerVersion.Major >= 3); // "problem with Dsc server version" + + } + catch (DryadLinqException) + { + passed &= false; + } + return passed; + } + + public static bool Bug_16341_SubmitThrowsForDifferentContexts() + { + var context = new DryadLinqContext(Config.cluster); + context.LocalExecution = false; + var context2 = new DryadLinqContext(Config.cluster); + context2.LocalExecution = false; + bool passed = true; + try + { + IQueryable input = context.FromStore(AzureUtils.ToAzureUri(Config.accountName, Config.containerName, + "unittest/inputdata/SimpleFile.txt")); + + IQueryable> simple = input.Apply(x => DataGenerator.CreateSimpleFileSets()); + IQueryable pt1 = simple.Select(x => x.First()); + + IQueryable input2 = context2.FromStore(AzureUtils.ToAzureUri(Config.accountName, Config.containerName, + "unittest/inputdata/SimpleFile.txt")); + + IQueryable> simple2 = input2.Apply(x => DataGenerator.CreateSimpleFileSets()); + IQueryable pt2 = simple2.Select(x => x.First()); + + DryadLinqQueryable.Submit(pt1, pt2); + passed &= false; + } + catch (DryadLinqException) + { + } + + try + { + IQueryable input = context.FromStore(AzureUtils.ToAzureUri(Config.accountName, Config.storageKey, Config.containerName, + "unittest/inputdata/SimpleFile.txt")); + + IQueryable> simple = input.Apply(x => DataGenerator.CreateSimpleFileSets()); + IQueryable pt1 = simple.Select(x => x.First()); + + IQueryable input2 = context2.FromStore(AzureUtils.ToAzureUri(Config.accountName, Config.storageKey, Config.containerName, + "unittest/inputdata/SimpleFile.txt")); + + IQueryable> simple2 = input2.Apply(x => DataGenerator.CreateSimpleFileSets()); + IQueryable pt2 = simple2.Select(x => x.First()); + + DryadLinqQueryable.SubmitAndWait(pt1, pt2); + passed &= false; + } + catch (DryadLinqException) + { + } + + return passed; + } + + public static bool Bug_16341_VariousTestsForSubmit() + { + var context = new DryadLinqContext(Config.cluster); + context.LocalExecution = false; + bool passed = true; + try + { + int[] data = new[] { 1, 2, 3 }; + var badQ1 = data.AsQueryable().Select(x => 100 + x); + var badQ2 = data.AsQueryable().Select(x => 100 + x); + + IQueryable input = context.FromStore(AzureUtils.ToAzureUri(Config.accountName, Config.containerName, + "unittest/inputdata/SimpleFile.txt")); + + IQueryable> simple = input.Apply(x => DataGenerator.CreateSimpleFileSets()); + IQueryable goodQ1 = simple.Select(x => x.First()); + + IQueryable input_copy = context.FromStore(AzureUtils.ToAzureUri(Config.accountName, Config.containerName, + "unittest/inputdata/SimpleFile.txt")); + + IQueryable> simple_copy = input_copy.Apply(x => DataGenerator.CreateSimpleFileSets()); + IQueryable goodQ2 = simple_copy.Select(x => x.First()); + + + try + { + badQ1.Submit(); + passed &= false; // "should throw as input isn't a L2H query" + } + catch (ArgumentException) + { + } + + try + { + DryadLinqQueryable.Submit((IQueryable)null); //this-Query overload + passed &= false; // "should throw ArgNull as input is null" + } + catch (ArgumentException) + { + //although we pass null, it goes to params[] overload which creates an actual array[1] containing one null + //hence we throw ArgumentException rather than ArgumentNullException. + } + + try + { + DryadLinqQueryable.Submit((IQueryable[])null); //multi-query overload + passed &= false; // "should throw ArgNull as input is null" + } + catch (ArgumentNullException) + { + } + + try + { + DryadLinqQueryable.Submit(goodQ1, null); //multi-query overload + passed &= false; // "should throw ArgEx as one of the inputs is null" + } + catch (ArgumentException) + { + } + + try + { + DryadLinqQueryable.Submit(goodQ1, badQ1); //multi-query overload + passed &= false; // "should throw ArgEx as one of the inputs is not a L2H" + } + catch (ArgumentException) + { + } + + //---------- + // same tests again for SubmitAndWait + + try + { + badQ1.SubmitAndWait(); + passed &= false; // "should throw as input isn't a L2H query" + } + catch (ArgumentException) + { + } + + try + { + DryadLinqQueryable.SubmitAndWait((IQueryable)null); //this-Query overload + passed &= false; // "should throw ArgNull as input is null" + } + catch (ArgumentException) + { + //although we pass null, it goes to params[] overload which creates an actual array[1] containing one null + //hence we throw ArgumentException rather than ArgumentNullException. + } + + try + { + DryadLinqQueryable.SubmitAndWait((IQueryable[])null); //multi-query overload + passed &= false; // "should throw ArgNull as input is null" + } + catch (ArgumentNullException) + { + } + + try + { + DryadLinqQueryable.SubmitAndWait(goodQ1, null); //multi-query overload + passed &= false; // "should throw ArgEx as one of the inputs is null" + } + catch (ArgumentException) + { + } + + try + { + DryadLinqQueryable.SubmitAndWait(goodQ1, badQ1); //multi-query overload + passed &= false; // "should throw ArgEx as one of the inputs is not a L2H" + } + catch (ArgumentException) + { + } + + } + catch (DryadLinqException) + { + } + return passed; + } + + + public static bool template() + { + var context = new DryadLinqContext(Config.cluster); + context.LocalExecution = false; + bool passed = true; + try + { + string outFile = "unittest/output/x.txt"; + + IQueryable input = context.FromStore(AzureUtils.ToAzureUri(Config.accountName, Config.containerName, + "unittest/inputdata/SimpleFile.txt")); + + IQueryable> simple = input.Apply(x => DataGenerator.CreateSimpleFileSets()); + IQueryable pt1 = simple.Select(x => x.First()); + + passed &= Utils.FileExists(Config.accountName, Config.storageKey, Config.containerName, outFile); + + } + catch (DryadLinqException) + { + passed &= false; + } + return passed; + } + + } + +} diff --git a/DryadLinqTests/DryadLinqTests.csproj b/DryadLinqTests/DryadLinqTests.csproj index 74f321f..c88877b 100644 --- a/DryadLinqTests/DryadLinqTests.csproj +++ b/DryadLinqTests/DryadLinqTests.csproj @@ -1,6 +1,6 @@ - + - + Debug @@ -63,37 +63,78 @@ False ..\packages\Microsoft.Data.Services.Client.5.6.1\lib\net40\Microsoft.Data.Services.Client.dll - + + False ..\packages\Microsoft.Hadoop.Client.1.1.0.7\lib\net40\Microsoft.Hadoop.Client.dll - - ..\packages\Microsoft.Research.Peloponnese.0.7.1-beta\lib\net45\Microsoft.Research.Peloponnese.HadoopBridge.dll + + False + ..\packages\Microsoft.Research.Peloponnese.0.7.2-beta\lib\net45\Microsoft.Research.Peloponnese.HadoopBridge.dll - - ..\packages\Microsoft.Research.Peloponnese.0.7.1-beta\lib\net45\Microsoft.Research.Peloponnese.Utils.dll + + False + ..\packages\Microsoft.Research.Peloponnese.0.7.2-beta\lib\net45\Microsoft.Research.Peloponnese.Utils.dll + + + False + ..\packages\Microsoft.Bcl.Async.1.0.166\lib\net40\Microsoft.Threading.Tasks.dll + + + ..\packages\Microsoft.Bcl.Async.1.0.166\lib\net40\Microsoft.Threading.Tasks.Extensions.dll + + + ..\packages\Microsoft.Bcl.Async.1.0.166\lib\net40\Microsoft.Threading.Tasks.Extensions.Desktop.dll + + + False + ..\packages\Microsoft.WindowsAzure.Common.1.0.1\lib\net45\Microsoft.WindowsAzure.Common.dll + + + False + ..\packages\Microsoft.WindowsAzure.Common.1.0.1\lib\net45\Microsoft.WindowsAzure.Common.NetFramework.dll ..\packages\Microsoft.WindowsAzure.ConfigurationManager.2.0.3\lib\net40\Microsoft.WindowsAzure.Configuration.dll - + + ..\packages\Microsoft.WindowsAzure.Management.1.0.0\lib\net40\Microsoft.WindowsAzure.Management.dll + + + False ..\packages\Microsoft.WindowsAzure.Management.HDInsight.1.1.0.7\lib\net40\Microsoft.WindowsAzure.Management.HDInsight.dll - + + False ..\packages\Microsoft.Hadoop.Client.1.1.0.7\lib\net40\Microsoft.WindowsAzure.Management.HDInsight.Framework.dll - + + False ..\packages\Microsoft.Hadoop.Client.1.1.0.7\lib\net40\Microsoft.WindowsAzure.Management.HDInsight.Framework.Core.dll + + False + ..\packages\Microsoft.WindowsAzure.Management.Storage.1.0.0\lib\net40\Microsoft.WindowsAzure.Management.Storage.dll + False ..\packages\WindowsAzure.Storage.3.1.0.1\lib\net40\Microsoft.WindowsAzure.Storage.dll - + + False ..\packages\Newtonsoft.Json.6.0.2\lib\net45\Newtonsoft.Json.dll + + + + ..\packages\Microsoft.Net.Http.2.2.19\lib\net45\System.Net.Http.Extensions.dll + + + ..\packages\Microsoft.Net.Http.2.2.19\lib\net45\System.Net.Http.Primitives.dll + + False ..\packages\System.Spatial.5.6.1\lib\net40\System.Spatial.dll @@ -106,8 +147,14 @@ + + + + + + @@ -137,14 +184,19 @@ + + + + + This project references NuGet package(s) that are missing on this computer. Enable NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. - - + + - + - + \ No newline at end of file diff --git a/DryadLinqTests/GroupByReduceTests.cs b/DryadLinqTests/GroupByReduceTests.cs new file mode 100644 index 0000000..e7c7d69 --- /dev/null +++ b/DryadLinqTests/GroupByReduceTests.cs @@ -0,0 +1,1003 @@ +using Microsoft.Research.DryadLinq; +using Microsoft.Research.Peloponnese.Storage; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Linq.Expressions; + +namespace DryadLinqTests +{ + public static class GroupByReduceTests + { + public static bool Decomposition_Average() + { + var context = new DryadLinqContext(Config.cluster); + context.LocalExecution = false; + bool passed = true; + try + { + IQueryable input = context.FromStore(AzureUtils.ToAzureUri(Config.accountName, Config.containerName, + "unittest/inputdata/SimpleFile.txt")); + + IQueryable> simple = input.Apply(x => DataGenerator.CreateSimpleFileSets()); + IQueryable pt1 = simple.Select(x => x.First()); + + double[] aggregates = pt1.GroupBy(x => x % 2).Select(g => g.Average()).ToArray(); + //int[] expected = new[] { 1 + 3 + 5 + 7 + 9 + 11, 2 + 4 + 6 + 8 + 10 + 12 }; + + ////note the order of the result elements is not guaranteed, so order them before testing + //int[] aggregatesOrdered = aggregates.OrderBy(x => x).ToArray(); + //int[] expectedOrdered = expected.OrderBy(x => x).ToArray(); + + //passed &= aggregatesOrdered.SequenceEqual(expectedOrdered); + } + catch (DryadLinqException) + { + passed &= false; + } + return passed; + } + + public static bool DistributiveResultSelector_1() + { + var context = new DryadLinqContext(Config.cluster); + context.LocalExecution = false; + bool passed = true; + try + { + IQueryable input = context.FromStore(AzureUtils.ToAzureUri(Config.accountName, Config.containerName, + "unittest/inputdata/SimpleFile.txt")); + + IQueryable> simple = input.Apply(x => DataGenerator.CreateSimpleFileSets()); + IQueryable pt1 = simple.Select(x => x.First()); + + // this result selector satisfies "DistributiveOverConcat" + int[] aggregates = pt1.GroupBy(x => x % 2, (key, seq) => seq.Sum()).ToArray(); + int[] expected = new[] { 1 + 3 + 5 + 7 + 9 + 11, 2 + 4 + 6 + 8 + 10 + 12 }; + + //note the order of the result elements is not guaranteed, so order them before testing + int[] aggregatesOrdered = aggregates.OrderBy(x => x).ToArray(); + int[] expectedOrdered = expected.OrderBy(x => x).ToArray(); + + passed &= aggregatesOrdered.SequenceEqual(expectedOrdered); + } + catch (DryadLinqException) + { + passed &= false; + } + return passed; + } + + public static bool DistributiveSelect_1() + { + var context = new DryadLinqContext(Config.cluster); + context.LocalExecution = false; + bool passed = true; + try + { + IQueryable input = context.FromStore(AzureUtils.ToAzureUri(Config.accountName, Config.containerName, + "unittest/inputdata/SimpleFile.txt")); + + IQueryable> simple = input.Apply(x => DataGenerator.CreateSimpleFileSets()); + IQueryable pt1 = simple.Select(x => x.First()); + + // this result selector satisfies "DistributiveOverConcat" + int[] aggregates = pt1.GroupBy(x => x % 2).Select(group => group.Sum()).ToArray(); + int[] expected = new[] { 1 + 3 + 5 + 7 + 9 + 11, 2 + 4 + 6 + 8 + 10 + 12 }; + + //note the order of the result elements is not guaranteed, so order them before testing + int[] aggregatesOrdered = aggregates.OrderBy(x => x).ToArray(); + int[] expectedOrdered = expected.OrderBy(x => x).ToArray(); + + passed &= aggregatesOrdered.SequenceEqual(expectedOrdered); + } + catch (DryadLinqException) + { + passed &= false; + } + return passed; + } + + public static bool BuiltInCountIsDistributable() + { + var context = new DryadLinqContext(Config.cluster); + context.LocalExecution = false; + bool passed = true; + try + { + IQueryable input = context.FromStore(AzureUtils.ToAzureUri(Config.accountName, Config.containerName, + "unittest/inputdata/SimpleFile.txt")); + + IQueryable> simple = input.Apply(x => DataGenerator.CreateSimpleFileSets()); + IQueryable pt1 = simple.Select(x => x.First()); + + // Built in Count is Distributable as built-in logic knows to use Sum() as the combiner function. + // Count(a,b,c,d) = Sum(Count(a,b), Count(c,d)) + int[] aggregates = pt1.GroupBy(x => x % 2, (key, seq) => seq.Count()).ToArray(); + int[] expected = new[] { 6, 6 }; // six elements in each full group. + + //note the order of the result elements is not guaranteed, so order them before testing + int[] aggregatesOrdered = aggregates.OrderBy(x => x).ToArray(); + int[] expectedOrdered = expected.OrderBy(x => x).ToArray(); + + passed &= aggregatesOrdered.SequenceEqual(expectedOrdered); + } + catch (DryadLinqException) + { + passed &= false; + } + return passed; + } + + public static bool Bug12078_GroupByReduceWithResultSelectingAggregate() + { + var context = new DryadLinqContext(Config.cluster); + context.LocalExecution = false; + bool passed = true; + try + { + IQueryable input = context.FromStore(AzureUtils.ToAzureUri(Config.accountName, Config.containerName, + "unittest/inputdata/SimpleFile.txt")); + + IQueryable> simple = input.Apply(x => DataGenerator.CreateGroupByReduceDataSet()); + IQueryable data = simple.Select(x => x.First()); + + double[] aggregates = data + .Select(x => (double)x) + .GroupBy(x => 0, (key, seq) => seq.Aggregate((double)0, (acc, item) => acc + item, val => val / 100)).ToArray(); + double[] expected = new[] { Enumerable.Range(1, 200).Sum() / 100.0 }; + + //note the order of the result elements is not guaranteed, so order them before testing + double[] aggregatesOrdered = aggregates.OrderBy(x => x).ToArray(); + double[] expectedOrdered = expected.OrderBy(x => x).ToArray(); + + passed &= aggregatesOrdered.SequenceEqual(expectedOrdered); + } + catch (DryadLinqException) + { + passed &= false; + } + return passed; + } + + #region GroupByReduceWithCustomDecomposableFunction_DistributableCombiner + + [Decomposable(typeof(Decomposer_1))] + public static double DecomposableFunc(IEnumerable seq) + { + // hard to test with context system.. TestUtils.Assert(HpcLinq.LocalDebug, "This method should only be called during LocalDebug"); + return seq.Aggregate((double)0, (acc, item) => acc + item, val => val / 100); + } + public class Decomposer_1 : IDecomposable + { + public void Initialize(object state) { } + + public double Seed(double source) + { + return source; + } + + public double Accumulate(double a, double x) + { + return a + x; + } + + public double RecursiveAccumulate(double a, double x) + { + return a + x; + } + + public double FinalReduce(double a) + { + return a / 100; + } + } + + public static bool GroupByReduceWithCustomDecomposableFunction_DistributableCombiner() + { + var context = new DryadLinqContext(Config.cluster); + context.LocalExecution = false; + bool passed = true; + try + { + IQueryable input = context.FromStore(AzureUtils.ToAzureUri(Config.accountName, Config.containerName, + "unittest/inputdata/SimpleFile.txt")); + + IQueryable> simple = input.Apply(x => DataGenerator.CreateGroupByReduceDataSet()); + IQueryable data = simple.Select(x => x.First()); + + double[] aggregates = data + .Select(x => (double)x) + .GroupBy(x => 0, (k, g) => DecomposableFunc(g)) + .ToArray(); + double[] expected = new[] { Enumerable.Range(1, 200).Sum() / 100.0 }; + + //note the order of the result elements is not guaranteed, so order them before testing + double[] aggregatesOrdered = aggregates.OrderBy(x => x).ToArray(); + double[] expectedOrdered = expected.OrderBy(x => x).ToArray(); + + passed &= aggregatesOrdered.SequenceEqual(expectedOrdered); + } + catch (DryadLinqException) + { + passed &= false; + } + return passed; + } + + #endregion GroupByReduceWithCustomDecomposableFunction_DistributableCombiner + + #region GroupByReduceWithCustomDecomposableFunction_DistributableCombiner_DifferingTypes + // Tests a fully decomposed function whose reducer changes types. + [Decomposable(typeof(Decomposer_2))] + public static string DecomposableFunc2(IEnumerable seq) + { + //TestUtils.Assert(HpcLinq.LocalDebug, "This method should only be called during LocalDebug"); + return seq.Aggregate((double)0, (acc, item) => acc + item, val => ("hello:" + val.ToString())); + } + public class Decomposer_2 : IDecomposable + { + public void Initialize(object state) { } + + public double Seed(double source) + { + return source; + } + + public double Accumulate(double a, double x) + { + return a + x; + } + + public double RecursiveAccumulate(double a, double x) + { + return a + x; + } + + public string FinalReduce(double a) + { + return ("hello:" + a.ToString()); + } + } + + public static bool GroupByReduceWithCustomDecomposableFunction_DistributableCombiner_DifferingTypes() + { + var context = new DryadLinqContext(Config.cluster); + context.LocalExecution = false; + bool passed = true; + try + { + IQueryable input = context.FromStore(AzureUtils.ToAzureUri(Config.accountName, Config.containerName, + "unittest/inputdata/SimpleFile.txt")); + + IQueryable> simple = input.Apply(x => DataGenerator.CreateGroupByReduceDataSet()); + IQueryable data = simple.Select(x => x.First()); + + string[] aggregates = data + .Select(x => (double)x) + .GroupBy(x => 0, (key, seq) => DecomposableFunc2(seq)).ToArray(); + string[] expected = new[] { "hello:" + Enumerable.Range(1, 200).Sum() }; + + //note the order of the result elements is not guaranteed, so order them before testing + string[] aggregatesOrdered = aggregates.OrderBy(x => x).ToArray(); + string[] expectedOrdered = expected.OrderBy(x => x).ToArray(); + + passed &= aggregatesOrdered.SequenceEqual(expectedOrdered); + } + catch (DryadLinqException) + { + passed &= false; + } + return passed; + } + + #endregion GroupByReduceWithCustomDecomposableFunction_DistributableCombiner_DifferingTypes + + #region GroupByReduceWithCustomDecomposableFunction_DistributableCombiner_NoFinalizer + // Tests a decomposed function with no need for a particular reduce. + // The combiner changes type, and the recursive-combiner operators on the altered type + // The reducer just calls combiner again. + [Decomposable(typeof(Decomposer_3))] + public static string DecomposableFunc3(IEnumerable seq) + { + // TestUtils.Assert(HpcLinq.LocalDebug, "This method should only be called during LocalDebug"); + return seq.Aggregate("0", (acc, item) => (double.Parse(acc) + item).ToString()); + } + public class Decomposer_3 : IDecomposable + { + public void Initialize(object state) { } + + public string Seed(double source) + { + return source.ToString(); + } + + public string Accumulate(string a, double x) + { + return (double.Parse(a) + x).ToString(); + } + + public string RecursiveAccumulate(string a, string x) + { + return (double.Parse(a) + double.Parse(x)).ToString(); + } + + public string FinalReduce(string a) + { + return a; + } + } + + public static bool GroupByReduceWithCustomDecomposableFunction_DistributableCombiner_NoFinalizer() + { + var context = new DryadLinqContext(Config.cluster); + context.LocalExecution = false; + bool passed = true; + try + { + IQueryable input = context.FromStore(AzureUtils.ToAzureUri(Config.accountName, Config.containerName, + "unittest/inputdata/SimpleFile.txt")); + + IQueryable> simple = input.Apply(x => DataGenerator.CreateGroupByReduceDataSet()); + IQueryable data = simple.Select(x => x.First()); + + string[] aggregates = data + .Select(x => (double)x) + .GroupBy(x => 0, (key, seq) => DecomposableFunc3(seq)).ToArray(); + string[] expected = new[] { Enumerable.Range(1, 200).Sum().ToString() }; + + //note the order of the result elements is not guaranteed, so order them before testing + string[] aggregatesOrdered = aggregates.OrderBy(x => x).ToArray(); + string[] expectedOrdered = expected.OrderBy(x => x).ToArray(); + + passed &= aggregatesOrdered.SequenceEqual(expectedOrdered); + } + catch (DryadLinqException) + { + passed &= false; + } + return passed; + } + + #endregion GroupByReduceWithCustomDecomposableFunction_DistributableCombiner_NoFinalizer + + #region GroupByReduceWithCustomDecomposableFunction_NonDistributableCombiner + // Tests simplified pattern where the Combiner is not recursively applied. + // Note: Func4 can be represented as a decomposable with distributive-combiner and a finalizer.. but here we choose not to. + // Because of the form of the Combiner, it is critical that it not be used recursively. + [Decomposable(typeof(Decomposer_4))] + public static double DecomposableFunc4(IEnumerable seq) + { + // TestUtils.Assert(HpcLinq.LocalDebug, "This method should only be called during LocalDebug"); + return seq.Aggregate(0.0, (acc, item) => acc + item, acc => acc / 100); + } + public class Decomposer_4 : IDecomposable + { + public void Initialize(object state) { } + + public double Seed(double source) + { + return source; + } + + public double Accumulate(double a, double x) + { + return a + x; + } + + public double RecursiveAccumulate(double a, double x) + { + return a + x; + } + + public double FinalReduce(double a) + { + return a / 100; + } + } + + public static bool GroupByReduceWithCustomDecomposableFunction_NonDistributableCombiner() + { + var context = new DryadLinqContext(Config.cluster); + context.LocalExecution = false; + bool passed = true; + try + { + IQueryable input = context.FromStore(AzureUtils.ToAzureUri(Config.accountName, Config.containerName, + "unittest/inputdata/SimpleFile.txt")); + + IQueryable> simple = input.Apply(x => DataGenerator.CreateGroupByReduceDataSet()); + IQueryable data = simple.Select(x => x.First()); + + double[] aggregates = data + .Select(x => (double)x) + .GroupBy(x => 0, (key, seq) => DecomposableFunc4(seq)).ToArray(); + double[] expected = new[] { Enumerable.Range(1, 200).Sum() / 100.0 }; + + //note the order of the result elements is not guaranteed, so order them before testing + double[] aggregatesOrdered = aggregates.OrderBy(x => x).ToArray(); + double[] expectedOrdered = expected.OrderBy(x => x).ToArray(); + + passed &= aggregatesOrdered.SequenceEqual(expectedOrdered); + } + catch (DryadLinqException) + { + passed &= false; + } + return passed; + } + + #endregion GroupByReduceWithCustomDecomposableFunction_NonDistributableCombiner + + public static bool GroupByReduce_BuiltIn_First() + { + var context = new DryadLinqContext(Config.cluster); + context.LocalExecution = false; + bool passed = true; + try + { + IQueryable input = context.FromStore(AzureUtils.ToAzureUri(Config.accountName, Config.containerName, + "unittest/inputdata/SimpleFile.txt")); + + IQueryable> simple = input.Apply(x => DataGenerator.CreateGroupByReduceDataSet()); + IQueryable data = simple.Select(x => x.First()); + + int[] aggregates = data + .GroupBy(x => 0, (key, seq) => seq.First()) + .ToArray(); + + // the output of First can be the first item of either partition. + passed &= aggregates.SequenceEqual(new[] { 1 }) || aggregates.SequenceEqual(new[] { 101 }); + } + catch (DryadLinqException) + { + passed &= false; + } + return passed; + } + + public static bool GroupByReduce_ResultSelector_ComplexNewExpression() + { + var context = new DryadLinqContext(Config.cluster); + context.LocalExecution = false; + bool passed = true; + try + { + IQueryable input = context.FromStore(AzureUtils.ToAzureUri(Config.accountName, Config.containerName, + "unittest/inputdata/SimpleFile.txt")); + + IQueryable> simple = input.Apply(x => DataGenerator.CreateGroupByReduceDataSet()); + IQueryable data = simple.Select(x => x.First()); + + var aggregates = data.GroupBy(x => 0, (key, seq) => new KeyValuePair>(key, new KeyValuePair(seq.Average(), seq.Average()))).ToArray(); + + var expected = new KeyValuePair>[] { new KeyValuePair>(0, new KeyValuePair(100.5, 100.5)) }; + + passed &= aggregates.SequenceEqual(expected); + } + catch (DryadLinqException) + { + passed &= false; + } + return passed; + } + + #region GroupByReduce_ProgrammingManualExample + + public static bool GroupByReduce_ProgrammingManualExample() + { + var context = new DryadLinqContext(Config.cluster); + context.LocalExecution = false; + bool passed = true; + try + { + string filesetName = "DevUnitTest/0to999integers"; + Utils.DeleteFile(Config.accountName, Config.storageKey, Config.containerName, filesetName, true); + + IEnumerable> rawdata = new[] { Enumerable.Range(0, 334), Enumerable.Range(334, 333), Enumerable.Range(667, 333) }; + // ??? DscIngressHelpers.AsDryadQueryPartitions(context, rawdata, filesetName, DscCompressionScheme.None); + var data = context.FromStore(filesetName); + + var count = data.AsEnumerable().Count(); + var sum = data.AsEnumerable().Sum(); + var min = data.AsEnumerable().Min(); + var max = data.AsEnumerable().Max(); + var uniques = data.AsEnumerable().Distinct().Count(); + + //Console.WriteLine("DATA:: count:{0} uniques:{1} sum:{2}, min:{3}, max:{4}", count, uniques, sum, min, max); + + // ??? + //var results = data + // .GroupBy(x => x % 10, (key, seq) => new KeyValuePair(key, seq.MyAverage())) + // .OrderBy(y => y.Key) + // .ToArray(); + + ////foreach (var result in results) + //// Console.WriteLine("For group {0} the average is {1}", result.Key, result.Value); + + //passed &= (results.Count() == 10); + //passed &= (results[0].Key == 0); // "first element should be key=0"); + //passed &= (results[0].Value == 495); // "first element should be value=495 ie avg(0,10,20,..,990)"); + + } + catch (DryadLinqException) + { + passed &= false; + } + return passed; + } + + [Decomposable(typeof(Decomposer_5))] + public static double MyAverage(this IEnumerable recordSequence) + { + int count = 0, sum = 0; + foreach (var r in recordSequence) + { + sum += r; + count++; + } + if (count == 0) throw new Exception("Can't average empty sequence"); + return (double)sum / (double)count; + } + + [Serializable] + public struct Partial + { + public int PartialSum; + public int PartialCount; + } + + public class Decomposer_5 : IDecomposable + { + public void Initialize(object state) { } + + public Partial Seed(int x) + { + Partial p = new Partial(); + p.PartialSum = x; + p.PartialCount = 1; + return p; + } + + public Partial Accumulate(Partial a, int x) + { + Partial p = new Partial(); + p.PartialSum = a.PartialSum + x; + p.PartialCount = a.PartialCount + 1; + return p; + } + + public Partial RecursiveAccumulate(Partial a, Partial x) + { + Partial p = new Partial(); + p.PartialSum = a.PartialSum + x.PartialSum; + p.PartialCount = a.PartialCount + x.PartialCount; + return p; + } + + public double FinalReduce(Partial a) + { + if (a.PartialCount == 0) throw new Exception("Can't average empty sequence"); + return (double)a.PartialSum / (double)a.PartialCount; + } + } + + #endregion GroupByReduce_ProgrammingManualExample + + + #region GroupByReduce_SameDecomposableUsedTwice + + public static bool GroupByReduce_SameDecomposableUsedTwice() + { + var context = new DryadLinqContext(Config.cluster); + context.LocalExecution = false; + bool passed = true; + try + { + IQueryable input = context.FromStore(AzureUtils.ToAzureUri(Config.accountName, Config.containerName, + "unittest/inputdata/SimpleFile.txt")); + + IQueryable> simple = input.Apply(x => DataGenerator.CreateGroupByReduceDataSet()); + IQueryable pt1 = simple.Select(x => x.First()); + + var results = pt1.GroupBy(x => x % 2, (k, g) => MyFunc(k, DecomposableFunc5(g), DecomposableFunc5(g), g.Average())).ToArray(); + + //key0: count = 6, av = av(2,4,6,8,10,12) = 7 + //key1: count = 6, av = av(1,3,5,7,9,11) = 6 + + //local sort.. so that keys are in order. + var results_sorted = results.OrderBy(x => x.Key).ToArray(); + + passed &= (results_sorted.Length == 2); // "wrong results" + + passed &= (results_sorted[0].Key == 0); // "wrong results" + passed &= (results_sorted[0].A == 6); // "wrong results" + passed &= (results_sorted[0].B == 6); // "wrong results" + passed &= (results_sorted[0].Av == 7.0); // "wrong results" + + passed &= (results_sorted[1].Key == 1); // "wrong results" + passed &= (results_sorted[1].A == 6); // "wrong results" + passed &= (results_sorted[1].B == 6); // "wrong results" + passed &= (results_sorted[1].Av == 6.0); // "wrong results" + } + catch (DryadLinqException) + { + passed &= false; + } + return passed; + } + + public static MyStruct3 MyFunc(int key, int a, int b, double av) + { + return new MyStruct3(key, a, b, av); + } + [Decomposable(typeof(Decomposer_6))] + private static int DecomposableFunc5(IEnumerable g) + { + return g.Count(); + } + public class Decomposer_6 : IDecomposable + { + public void Initialize(object state) { } + + public int Seed(int source) { return 1; } + + public int Accumulate(int a, int x) + { + return a + 1; + } + + public int RecursiveAccumulate(int a, int x) + { + return a + x; + } + + public int FinalReduce(int a) + { + return a; + } + } + [Serializable] + public struct MyStruct3 + { + public int Key; + public int A; + public int B; + public double Av; + + public MyStruct3(int key, int a, int b, double av) + { + Key = key; A = a; B = b; Av = av; + } + } + + #endregion GroupByReduce_SameDecomposableUsedTwice + + #region API_Misuse + internal static bool GroupByReduce_APIMisuse() + { + var context = new DryadLinqContext(Config.cluster); + context.LocalExecution = false; + bool passed = true; + try + { + if (context.LocalDebug) + { + // "decomposition logic doesn't run in LocalDebug.. skipping"; + return true; + } + + IQueryable input = context.FromStore(AzureUtils.ToAzureUri(Config.accountName, Config.containerName, + "unittest/inputdata/SimpleFile.txt")); + + IQueryable> simple = input.Apply(x => DataGenerator.CreateGroupByReduceDataSet()); + IQueryable pt1 = simple.Select(x => x.First()); + + // internal-visibility decomposable type should fail. + try + { + pt1.GroupBy(x => x, (k, g) => BadDecomposable1(g)).ToArray(); + passed &= false; // "exception should be thrown" + } + catch (DryadLinqException) + { + //??? passed &= (Ex.ErrorCode == DryadLinqErrorCode.DecomposerTypeMustBePublic); // "error code is wrong" + } + + // decomposable type doesn't implement IDecomposable or IDecomposableRecursive + try + { + pt1.GroupBy(x => x, (k, g) => BadDecomposable2(g)).ToArray(); + passed &= false; //"exception should be thrown"); + } + catch (DryadLinqException) + { + //??? passed &= (Ex.ErrorCode == DryadLinqErrorCode.DecomposerTypeDoesNotImplementInterface); + } + + // decomposable type implements more than one IDecomposable or IDecomposableRecursive + try + { + pt1.GroupBy(x => x, (k, g) => BadDecomposable3(g)).ToArray(); + passed &= false; + } + catch (DryadLinqException) + { + //??? passed &= (Ex.ErrorCode == DryadLinqErrorCode.DecomposerTypeImplementsTooManyInterfaces); + } + + // decomposable type doesn't have public default ctor + try + { + pt1.GroupBy(x => x, (k, g) => BadDecomposable4(g)).ToArray(); + passed &= false; + } + catch (DryadLinqException) + { + //??? passed &= (Ex.ErrorCode == DryadLinqErrorCode.DecomposerTypeDoesNotHavePublicDefaultCtor); + } + + // decomposable type input type doesn't match + try + { + pt1.GroupBy(x => x, (k, g) => BadDecomposable5(g)).ToArray(); + passed &= false; + } + catch (DryadLinqException) + { + //??? passed &= (Ex.ErrorCode == DryadLinqErrorCode.DecomposerTypesDoNotMatch); + } + + // decomposable type output type doesn't match + try + { + pt1.GroupBy(x => x, (k, g) => BadDecomposable6(g)).ToArray(); + passed &= false; + } + catch (DryadLinqException) + { + //??? passed &= (Ex.ErrorCode == DryadLinqErrorCode.DecomposerTypesDoNotMatch); + } + } + catch (DryadLinqException) + { + passed &= false; + } + return passed; + } + + [Decomposable(typeof(BadDecomposerType1))] + private static int BadDecomposable1(IEnumerable g) + { + throw new NotImplementedException(); + } + internal class BadDecomposerType1 : IDecomposable + { + public void Initialize(object state) { } + public int Seed(int x) { return x; } + public int Accumulate(int a, int x) { throw new NotImplementedException(); } + public int RecursiveAccumulate(int a, int x) { throw new NotImplementedException(); } + public int FinalReduce(int a) { throw new NotImplementedException(); } + } + [Decomposable(typeof(BadDecomposerType2))] + private static int BadDecomposable2(IEnumerable g) + { + throw new NotImplementedException(); + } + public class BadDecomposerType2 + { + } + [Decomposable(typeof(BadDecomposerType3))] + private static int BadDecomposable3(IEnumerable g) + { + throw new NotImplementedException(); + } + public class BadDecomposerType3 : IDecomposable + { + public void Initialize(object state) { } + public int Seed(int x) { return x; } + public int Accumulate(int a, int x) { throw new NotImplementedException(); } + public int RecursiveAccumulate(int a, int x) { throw new NotImplementedException(); } + public int FinalReduce(int a) { throw new NotImplementedException(); } + } + [Decomposable(typeof(BadDecomposerType4))] + private static int BadDecomposable4(IEnumerable g) + { + throw new NotImplementedException(); + } + public class BadDecomposerType4 : IDecomposable + { + internal BadDecomposerType4() { } + public BadDecomposerType4(int x) { } + public void Initialize(object state) { } + public int Seed(int x) { return x; } + public int Accumulate(int a, int x) { throw new NotImplementedException(); } + public int RecursiveAccumulate(int a, int x) { throw new NotImplementedException(); } + public int FinalReduce(int a) { throw new NotImplementedException(); } + } + [Decomposable(typeof(BadDecomposerType5))] + private static int BadDecomposable5(IEnumerable g) + { + throw new NotImplementedException(); + } + public class BadDecomposerType5 : IDecomposable + { + public void Initialize(object state) { } + public int Seed(double s) { throw new NotImplementedException(); } + public int Accumulate(int a, double x) { throw new NotImplementedException(); } + public int RecursiveAccumulate(int a, int x) { throw new NotImplementedException(); } + public int FinalReduce(int a) { throw new NotImplementedException(); } + } + [Decomposable(typeof(BadDecomposerType6))] + private static int BadDecomposable6(IEnumerable g) + { + throw new NotImplementedException(); + } + public class BadDecomposerType6 : IDecomposable + { + public void Initialize(object state) { } + public int Seed(int s) { throw new NotImplementedException(); } + public int Accumulate(int a, int x) { throw new NotImplementedException(); } + public int RecursiveAccumulate(int a, int x) { throw new NotImplementedException(); } + public double FinalReduce(int a) { throw new NotImplementedException(); } + } + + #endregion API_Misuse + + public static bool GroupByReduce_ListInitializerReducer() + { + var context = new DryadLinqContext(Config.cluster); + context.LocalExecution = false; + bool passed = true; + try + { + IQueryable input = context.FromStore(AzureUtils.ToAzureUri(Config.accountName, Config.containerName, + "unittest/inputdata/SimpleFile.txt")); + + IQueryable> simple = input.Apply(x => DataGenerator.CreateGroupByReduceDataSet()); + IQueryable pt1 = simple.Select(x => x.First()); + + var results = pt1.GroupBy(x => x % 2, (k, g) => new List() { k, g.Count(), g.Sum() }).ToArray(); + + //local sort.. so that keys are in order. + var resultsSorted = results.OrderBy(list => list[0]).ToArray(); + + //key0: count = 6, sum = 42 + //key1: count = 6, sum = 36 + + passed &= (resultsSorted[0][0] == 0); // "incorrect results.1" + passed &= (resultsSorted[0][1] == 6); // "incorrect results.2" + passed &= (resultsSorted[0][2] == 42); // "incorrect results.3" + + passed &= (resultsSorted[1][0] == 1); // "incorrect results.4" + passed &= (resultsSorted[1][1] == 6); // "incorrect results.5" + passed &= (resultsSorted[1][2] == 36); // "incorrect results.6" + } + catch (DryadLinqException) + { + passed &= false; + } + return passed; + } + + public static bool GroupByReduce_CustomListInitializerReducer() + { + var context = new DryadLinqContext(Config.cluster); + context.LocalExecution = false; + bool passed = true; + try + { + IQueryable input = context.FromStore(AzureUtils.ToAzureUri(Config.accountName, Config.containerName, + "unittest/inputdata/SimpleFile.txt")); + + IQueryable> simple = input.Apply(x => DataGenerator.CreateGroupByReduceDataSet()); + IQueryable pt1 = simple.Select(x => x.First()); + + var results = pt1.GroupBy(x => x % 2, (k, g) => new MultiParamInitializerClass() { + {k, g.Count(), g.Sum()} , // one item, comprising three components + }).ToArray(); + //local sort.. so that keys are in order. + var resultsSorted = results.OrderBy(list => list.Key).ToArray(); + + //key0: count = 6, sum = 42 + //key1: count = 6, sum = 36 + + passed &= (resultsSorted[0].Key == 0); // "incorrect results.1" + passed &= (resultsSorted[0].Count() == 6); // "incorrect results.2" + passed &= (resultsSorted[0].Sum() == 42); // "incorrect results.3" + + passed &= (resultsSorted[1].Key == 1); // "incorrect results.4" + passed &= (resultsSorted[1].Count() == 6); // "incorrect results.5" + passed &= (resultsSorted[1].Sum() == 36); // "incorrect results.6" + } + catch (DryadLinqException) + { + passed &= false; + } + return passed; + } + + // note: must be IEnumerable<> to be allowed to participate in list-initializer syntax. + // we are cheating here and only supporting one "add" call, just as an example. + [Serializable] + public class MultiParamInitializerClass : IEnumerable + { + public int Key; + public int Sum; + public int Count; + + public void Add(int key, int count, int sum) + { + Key = key; + Count = count; + Sum = sum; + } + public IEnumerator GetEnumerator() + { + yield return Key; + yield return Count; + yield return Sum; + } + System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator() + { + return GetEnumerator(); + } + } + + public static bool GroupByReduce_BitwiseNegationOperator() + { + var context = new DryadLinqContext(Config.cluster); + context.LocalExecution = false; + bool passed = true; + try + { + IQueryable input = context.FromStore(AzureUtils.ToAzureUri(Config.accountName, Config.containerName, + "unittest/inputdata/SimpleFile.txt")); + + IQueryable> simple = input.Apply(x => DataGenerator.CreateGroupByReduceDataSet()); + IQueryable pt1 = simple.Select(x => x.First()); + + var results = pt1.GroupBy(x => x % 2, (k, g) => new KeyValuePair(k, ~g.Sum())).ToArray(); + + //local sort.. so that keys are in order. + var resultsSorted = results.OrderBy(list => list.Key).ToArray(); + + //key0: count = 6, sum = 42 + //key1: count = 6, sum = 36 + + passed &= (resultsSorted[0].Key == 0); // "incorrect results.1" + passed &= (resultsSorted[0].Value == ~42); // "incorrect results.2" + + passed &= (resultsSorted[1].Key == 1); // "incorrect results.3" + passed &= (resultsSorted[1].Value == ~36); // "incorrect results.4" + } + catch (DryadLinqException) + { + passed &= false; + } + return passed; + } + + public static bool template() + { + var context = new DryadLinqContext(Config.cluster); + context.LocalExecution = false; + bool passed = true; + try + { + IQueryable input = context.FromStore(AzureUtils.ToAzureUri(Config.accountName, Config.containerName, + "unittest/inputdata/SimpleFile.txt")); + + IQueryable> simple = input.Apply(x => DataGenerator.CreateGroupByReduceDataSet()); + IQueryable data = simple.Select(x => x.First()); + + //passed &= aggregatesOrdered.SequenceEqual(expectedOrdered); + } + catch (DryadLinqException) + { + passed &= false; + } + return passed; + } + } +} diff --git a/DryadLinqTests/Program.cs b/DryadLinqTests/Program.cs index c6813bd..c3b6dc9 100644 --- a/DryadLinqTests/Program.cs +++ b/DryadLinqTests/Program.cs @@ -21,6 +21,7 @@ using System; using System.Collections.Generic; using System.Linq; using System.Text; +using System.IO; using System.Threading.Tasks; using Microsoft.Research.DryadLinq; @@ -32,64 +33,32 @@ namespace DryadLinqTests { public static void Main(string[] args) { - //Test1(args); - //Test2(args); - //Test3(args); - //Test4(args); - //Test5(args); - } + //BasicAPITests.ToStoreThrowsForNonQuery(); + //BasicAPITests.ToStoreGetEnumeratorThrows(); + //BasicAPITests.GetEnumeratorNonToStoreTerminated(); + //BasicAPITests.ToStoreSubmitGetEnumerator(); + //BasicAPITests.SubmitNonToStoreTerminated(); + //BasicAPITests.MaterializeToStoreTerminated(); + //BasicAPITests.MaterializeNonToStoreTerminated(); + //BasicAPITests.EnumeratePlainData(); - public static void Test1(string[] args) - { - DryadLinqContext context = new DryadLinqContext(1, "partfile"); - //context.PartitionUncPath = "DryadLinqTemp/PartFiles"; - var input = context.FromStore("partfile:///d:/DryadLinqTemp/PartFiles/foo.pt"); - var lines = input.Where(x => x.Line.Contains("white")); - var result = lines.ToStore("partfile://svc-yuanbyu-3/DryadLinqTemp/PartFiles/res1.pt", true); - result.SubmitAndWait(); - } + //BasicAPITests.RepeatSubmit(); + //BasicAPITests.RepeatMaterialize(); + //BasicAPITests.MaterializeMentionsSameQueryTwice(); + //BasicAPITests.QueryOnDataBackedDLQ(); + //BasicAPITests.Bug11781_CountandFirstOrDefault(); + //BasicAPITests.Bug11782_Aggregate(); + //BasicAPITests.Bug11782_LowLevelQueryableManipulation(); + //BasicAPITests.Bug11638_LongWhere(); + //BasicAPITests.AssumeRangePartition(); + //BasicAPITests.Bug11638_LongMethods(); + //BasicAPITests.ContextConfigIsReadOnly(); + //BasicAPITests.ToggleSpeculativeDuplication(); + //BasicAPITests.Bug15068_ConfigResourcesAPI(); + //BasicAPITests.Bug_16341_SubmitThrowsForDifferentContexts(); + //BasicAPITests.Bug14449_ContextShouldExposeVersionIDs(); + //BasicAPITests.Bug_16341_VariousTestsForSubmit(); - public static void Test2(string[] args) - { - DryadLinqContext context = new DryadLinqContext(1, "partfile:///d:/DryadLinqTemp/PartFiles"); - var input = context.FromStore("partfile:///d:/DryadLinqTemp/PartFiles/foo.pt"); - var q1 = input.Where(x => x.Line.Contains("white")); - var q2 = input.Where(x => x.Line.Contains("the")); - var res1 = q1.ToStore("partfile:///d:/DryadLinqTemp/PartFiles/res1.pt", true); - var res2 = q2.ToStore("partfile:///d:/DryadLinqTemp/PartFiles/res2.pt", true); - DryadLinqQueryable.SubmitAndWait(res1, res2); - } - - public static void Test3(string[] args) - { - DryadLinqContext context = new DryadLinqContext(1, "partfile:///d:/DryadLinqTemp/PartFiles"); - var input = context.FromStore("partfile:///d:/DryadLinqTemp/PartFiles/foo.pt"); - var words = input.SelectMany(x => x.Line.Split(' ')); - var groups = words.GroupBy(x => x); - var counts = groups.Select(x => new KeyValuePair(x.Key, x.Count())); - var toOutput = counts.Select(x => new LineRecord(String.Format("{0}: {1}", x.Key, x.Value))); - var result = toOutput.ToStore("partfile:///d:/DryadLinqTemp/PartFiles/res2.pt", true); - result.SubmitAndWait(); - } - - public static void Test4(string[] args) - { - DryadLinqContext context = new DryadLinqContext("svc-d2-01"); - var input = context.FromStore("hdfs://svc-d2-01:8033/user/misard/foo.txt"); - var lines = input.Where(x => x.Line.Contains("white")); - var result = lines.ToStore("hdfs://svc-d2-01:8033/user/yuanbyu/foo.txt", true); - result.SubmitAndWait(); - } - - public static void Test5(string[] args) - { - DryadLinqContext context = new DryadLinqContext(1); - Uri dataUri = AzureUtils.ToAzureUri("msrsvc", "I4JPlk0bZ6YWypg+RJamyq0us1b+kCcuoeKlPhfiHTcVW7P4xvuzURvlRShSo1O3UDhcL2LiY4kMaarD+p1lKg==", "test", "testwrite"); - IEnumerable lines = DataProvider.ReadData(context, dataUri); - foreach (var x in lines) - { - Console.WriteLine(x); - } } } } diff --git a/DryadLinqTests/Properties/AssemblyInfo.cs b/DryadLinqTests/Properties/AssemblyInfo.cs index effbff7..db85f70 100644 --- a/DryadLinqTests/Properties/AssemblyInfo.cs +++ b/DryadLinqTests/Properties/AssemblyInfo.cs @@ -51,5 +51,5 @@ using System.Runtime.InteropServices; // You can specify all the values or you can default the Build and Revision Numbers // by using the '*' as shown below: // [assembly: AssemblyVersion("1.0.*")] -[assembly: AssemblyVersion("0.1.0.0")] -[assembly: AssemblyFileVersion("0.1.0.0")] +[assembly: AssemblyVersion("0.1.2.0")] +[assembly: AssemblyFileVersion("0.1.2.0")] diff --git a/DryadLinqTests/RangePartitionAPICoverageTests.cs b/DryadLinqTests/RangePartitionAPICoverageTests.cs new file mode 100644 index 0000000..3581390 --- /dev/null +++ b/DryadLinqTests/RangePartitionAPICoverageTests.cs @@ -0,0 +1,12 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace DryadLinqTests +{ + public class RangePartitionAPICoverageTests + { + } +} diff --git a/DryadLinqTests/Utils.cs b/DryadLinqTests/Utils.cs new file mode 100644 index 0000000..5aec8aa --- /dev/null +++ b/DryadLinqTests/Utils.cs @@ -0,0 +1,157 @@ +using Microsoft.Research.DryadLinq; +using Microsoft.WindowsAzure.Storage; +using Microsoft.WindowsAzure.Storage.Blob; +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; + + +namespace DryadLinqTests +{ + public static class Config + { + public static string accountName = @"MyAccountName"; + public static string storageKey = @"MyStorageKey"; + public static string containerName = @"MyContainerName"; + public static string cluster = "MyCcluster"; + } + + public class DataGenerator + { + public DataGenerator() + { + } + + public static IEnumerable> CreateSimpleFileSets() + { + IEnumerable> data = new int[][] + { + new[] { 1, 2, 3, 4 }, + new[] { 5, 6, 7, 8 }, + new[] { 9, 10, 11, 12 }, + }; + return data; + } + + public static IEnumerable> CreateGroupByReduceDataSet() + { + // we need quite a few elements to ensure the combiner will be activated in Stage#1 groupBy. + // 33 elements per partition should suffice, but 100 per partition is safer. + IEnumerable> data = new int[][] + { + Enumerable.Range(1,100).ToArray(), + Enumerable.Range(101,100).ToArray(), + }; + return data; + } + + + } + + public class Utils + { + public static bool DeleteFile(string accountName, string accountKey, string containerName, string fileName, bool delSubDirs) + { + try + { + CloudStorageAccount storageAccount = CloudStorageAccount.Parse("DefaultEndpointsProtocol=http;AccountName=" + accountName + ";AccountKey=" + accountKey); + CloudBlobClient blobClient = storageAccount.CreateCloudBlobClient(); + CloudBlobContainer container = blobClient.GetContainerReference(containerName); + container.CreateIfNotExists(); + BlobContainerPermissions containerPermissions = new BlobContainerPermissions(); + containerPermissions.PublicAccess = BlobContainerPublicAccessType.Blob; + container.SetPermissions(containerPermissions); + + if (false == delSubDirs) + { + CloudBlockBlob remoteFile = container.GetBlockBlobReference(fileName); + remoteFile.DeleteIfExists(); + } + + if (true == delSubDirs) + { + foreach (IListBlobItem item in container.ListBlobs(fileName, true)) + { + CloudBlockBlob blob = (CloudBlockBlob)item; + blob.DeleteIfExists(); + } + } + } + catch (Exception) + { + return false; + } + return true; + } + public static bool FileExists(string accountName, string accountKey, string containerName, string fileName) + { + try + { + CloudStorageAccount storageAccount = CloudStorageAccount.Parse("DefaultEndpointsProtocol=http;AccountName=" + accountName + ";AccountKey=" + accountKey); + CloudBlobClient blobClient = storageAccount.CreateCloudBlobClient(); + CloudBlobContainer container = blobClient.GetContainerReference(containerName); + container.CreateIfNotExists(); + BlobContainerPermissions containerPermissions = new BlobContainerPermissions(); + containerPermissions.PublicAccess = BlobContainerPublicAccessType.Blob; + container.SetPermissions(containerPermissions); + + CloudBlockBlob remoteFile = container.GetBlockBlobReference(fileName); + if (!remoteFile.Exists()) + return false; + } + catch (Exception) + { + return false; + } + return true; + } + + internal static DryadLinqContext MakeBasicConfig(string cluster) //??? + { + var context = new DryadLinqContext(cluster); + try + { + context.JobFriendlyName = "DryadLinq_DevUnitTests"; + context.CompileForVertexDebugging = true; + context.JobEnvironmentVariables.Add("DummyEnvVar", "hello"); //note: this is consumed by a unit-test. + + if (File.Exists("Microsoft.Hpc.Linq.pdb")) // TODO: fix references + { + context.ResourcesToAdd.Add("Microsoft.Hpc.Linq.pdb"); + } + + if (File.Exists("Microsoft.Hpc.Dsc.Client.pdb")) // TODO: fix references + { + context.ResourcesToAdd.Add("Microsoft.Hpc.Dsc.Client.pdb"); + } + + // To prevent job from running forever, and blocking other test + context.JobRuntimeLimit = (int)TimeSpan.FromMinutes(30).TotalSeconds; + + + //config.AllowConcurrentUserDelegatesInSingleProcess = false; + + // If we are on Azure, we have to set the nodeGroup to "NodeRole" so that the default of "ComputeNodes" is not used + // This fixes "FromEnumerableTests" on Azure which queries the active node-group. + // Note also, the headnode for an azure deployment defaults to "HPCCluster" (at least from James' script) + int onAzureInt = 0; + string onAzureString = Environment.GetEnvironmentVariable("CCP_SCHEDULERONAZURE"); + if (onAzureString != null) + { + int.TryParse(onAzureString, out onAzureInt); + } + + if (onAzureInt == 1) + { + context.NodeGroup = "NodeRole"; + } + + } + catch (DryadLinqException) + { + } + return context; + } + } +} diff --git a/DryadLinqTests/Validate.cs b/DryadLinqTests/Validate.cs new file mode 100644 index 0000000..f14904f --- /dev/null +++ b/DryadLinqTests/Validate.cs @@ -0,0 +1,267 @@ +///------------------------------------------------------------------------------------------------- +// file: Validate.cs +// +// summary: Implements the validate class +///------------------------------------------------------------------------------------------------- + +using System; +using System.Collections.Generic; +using System.Linq; + +namespace BenchmarkFramework { + + ///------------------------------------------------------------------------------------------------- + /// Validation utils + ///------------------------------------------------------------------------------------------------- + + public class Validate { + + public static void + Check( + IEnumerable[] ss, + IComparer comparer = null, + bool sort = true, + bool verbose = false, + IComparer sortcomparer = null + ) { + + if(ss.Length == 0) return; + + if(comparer == null) { + comparer = Comparer.Default; + if(comparer == null) { + throw new ArgumentNullException("Can't not be null."); + } + } + if(sortcomparer == null) + sortcomparer = comparer; + + T[][] aa = new T[ss.Length][]; + for(int i = 0; i < aa.Length; i++) { + aa[i] = ss[i].ToArray(); + if(sort) Array.Sort(aa[i], sortcomparer); + } + int len = aa[0].Length; + for(int i = 1; i < aa.Length; i++) { + if(aa[i].Length != len) { + throw new Exception("Wrong number of elements."); + } + } + for(int i = 0; i < len; i++) { + T elem = aa[0][i]; + for(int j = 1; j < aa.Length; j++) { + if(verbose) { + //TestOutput.WriteLine("Comparing {0} to {1}", elem.ToString(), aa[j][i].ToString()); + } + if(comparer.Compare(elem, aa[j][i]) != 0) { + throw new Exception("Elements failed to match: " + elem + " != " + aa[j][i]); + } + } + } + } + + //public static void + //GroupCheck( + // IEnumerable>[] ss, + // IComparer kComparer = null, + // IComparer vComparer = null + // ) { + + // if(ss.Length == 0) return; + + // if(kComparer == null) { + // kComparer = Comparer.Default; + // if(kComparer == null) { + // throw new ArgumentNullException("Can't not be null."); + // } + // } + // if(vComparer == null) { + // vComparer = Comparer.Default; + // if(vComparer == null) { + // throw new ArgumentNullException("Can't not be null."); + // } + // } + // IGrouping[][] aa = new IGrouping[ss.Length][]; + // for(int i = 0; i < aa.Length; i++) { + // aa[i] = ss[i].ToArray(); + // K[] keys = aa[i].Select(x => x.Key).ToArray(); + // Array.Sort(keys, aa[i], kComparer); + // } + // int len = aa[0].Length; + // for(int i = 1; i < aa.Length; i++) { + // if(aa[i].Length != len) { + // throw new Exception("Wrong number of elements."); + // } + // } + // for(int i = 0; i < len; i++) { + // IEnumerable elem = aa[0][i]; + // for(int j = 1; j < aa.Length; j++) { + // Check(new IEnumerable[] { elem, aa[j][i] }, vComparer); + // } + // } + //} + } + + /* + ///------------------------------------------------------------------------------------------------- + /// Tolerant float comparer. Floating point differences between + /// GPU and CPU cause the default comparer to fail sometimes even when + /// the result is correct. Use this comparer to introduce some tolerance + /// for this + /// + /// + /// Crossbac, 2/19/2013. + ///------------------------------------------------------------------------------------------------- + + public class TolerantDoubleComparer : IComparer { + Double EPSILON; + public TolerantDoubleComparer(Double _epsilon = 0.000001f) { + EPSILON = _epsilon; + } + public int Compare(Double a, Double b) { + Double delta = a - b; + if(Math.Abs(delta) <= EPSILON) + return 0; + return delta < 0.0f ? -1 : 1; + } + } + + ///------------------------------------------------------------------------------------------------- + /// Tolerant float comparer. Floating point differences between + /// GPU and CPU cause the default comparer to fail sometimes even when + /// the result is correct. Use this comparer to introduce some tolerance + /// for this + /// + /// + /// Crossbac, 2/19/2013. + ///------------------------------------------------------------------------------------------------- + + public class TolerantFloatComparer : IComparer { + float EPSILON; + public TolerantFloatComparer(float _epsilon = 0.000001f) { + EPSILON = _epsilon; + } + public int Compare(float a, float b) { + float delta = a - b; + if(Math.Abs(delta) <= EPSILON) + return 0; + return delta < 0.0f ? -1 : 1; + } + } + + ///------------------------------------------------------------------------------------------------- + /// Tolerant float comparer. Floating point differences between + /// GPU and CPU cause the default comparer to fail sometimes even when + /// the result is correct. Use this comparer to introduce some tolerance + /// for this + /// + /// + /// Crossbac, 2/19/2013. + ///------------------------------------------------------------------------------------------------- + + public class TolerantVectorComparer : IComparer { + float EPSILON; + public TolerantVectorComparer(float _epsilon = 0.0001f) { + EPSILON = _epsilon; + } + public int Compare(Vector a, Vector b) { + for(int i = 0; i < a.m_elems.Length; i++) { + float delta = a.m_elems[i] - b.m_elems[i]; + if(Math.Abs(delta) > EPSILON) + return delta < 0.0f ? -1 : 1; + } + return 0; + } + } + + ///------------------------------------------------------------------------------------------------- + /// Interface for epsilon comparable single. + /// + /// Crossbac, 1/16/2014. + /// + /// Generic type parameter. + ///------------------------------------------------------------------------------------------------- + + public interface IEpsilonComparableSingle { + int EpsilonCompare(T a, T b, float epsilon); + } + + ///------------------------------------------------------------------------------------------------- + /// Interface for epsilon comparable double. + /// + /// Crossbac, 1/16/2014. + /// + /// Generic type parameter. + ///------------------------------------------------------------------------------------------------- + + public interface IEpsilonComparableDouble { + int EpsilonCompare(T a, T b, double epsilon); + } + + + ///------------------------------------------------------------------------------------------------- + /// Tolerant float comparer. Floating point differences between + /// GPU and CPU cause the default comparer to fail sometimes even when + /// the result is correct. Use this comparer to introduce some tolerance + /// for this + /// + /// + /// Crossbac, 2/19/2013. + ///------------------------------------------------------------------------------------------------- + + public class EpsilonComparer : IComparer where T : IEpsilonComparableSingle { + float EPSILON; + public EpsilonComparer(float _epsilon = 0.0001f) { + EPSILON = _epsilon; + } + public int Compare(T a, T b) { + return a.EpsilonCompare(a, b, EPSILON); + } + } + + + ///------------------------------------------------------------------------------------------------- + /// Tolerant float comparer for images. Floating point differences between + /// GPU and CPU cause the default comparer to fail sometimes even when + /// the result is correct. Use this comparer to introduce some tolerance + /// for this + /// + /// + /// Crossbac, 2/19/2013. + ///------------------------------------------------------------------------------------------------- + + public class TolerantImageComparer : IComparer { + float EPSILON; + public TolerantImageComparer(float _epsilon = 0.0001f) { + EPSILON = _epsilon; + } + public int Compare(Image a, Image b) { + for(int i = 0; i < a.m_elems.Length; i++) { + float delta = a.m_elems[i] - b.m_elems[i]; + if(Math.Abs(delta) > EPSILON) + return delta < 0.0f ? -1 : 1; + } + return 0; + } + } + + /// + /// Compare two instances of Pair. + /// Implement for concrete type as Pair cannot straighforwardly implement IComparable. + /// + /// + /// jcurrey, 3/11/2013. + public class PairIntIntComparer : IComparer> { + public int Compare(Pair a, Pair b) { + int keyComparison = a.Key.CompareTo(b.Key); + if(keyComparison == 0) { + return a.Value.CompareTo(b.Value); + } else { + return keyComparison; + } + } + } + +*/ + +} diff --git a/DryadLinqTests/packages.config b/DryadLinqTests/packages.config index 6cc3081..bd29d67 100644 --- a/DryadLinqTests/packages.config +++ b/DryadLinqTests/packages.config @@ -1,13 +1,21 @@ - + + + + - + + + + + + - + \ No newline at end of file diff --git a/DryadVertex/VertexHost/system/channel/channel.vcxproj b/DryadVertex/VertexHost/system/channel/channel.vcxproj index 763d09f..d68bcfd 100644 --- a/DryadVertex/VertexHost/system/channel/channel.vcxproj +++ b/DryadVertex/VertexHost/system/channel/channel.vcxproj @@ -1,6 +1,6 @@ - + - + Debug @@ -149,13 +149,13 @@ - + This project references NuGet package(s) that are missing on this computer. Enable NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. - - + + - + \ No newline at end of file diff --git a/DryadVertex/VertexHost/system/channel/packages.config b/DryadVertex/VertexHost/system/channel/packages.config index 7868d60..cb331ea 100644 --- a/DryadVertex/VertexHost/system/channel/packages.config +++ b/DryadVertex/VertexHost/system/channel/packages.config @@ -1,4 +1,4 @@ - + - - + + \ No newline at end of file diff --git a/DryadVertex/VertexHost/system/dprocess/dprocess.vcxproj b/DryadVertex/VertexHost/system/dprocess/dprocess.vcxproj index dbe63b4..67ff44b 100644 --- a/DryadVertex/VertexHost/system/dprocess/dprocess.vcxproj +++ b/DryadVertex/VertexHost/system/dprocess/dprocess.vcxproj @@ -1,6 +1,6 @@ - + - + Debug @@ -123,13 +123,13 @@ - + This project references NuGet package(s) that are missing on this computer. Enable NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. - - + + - + \ No newline at end of file diff --git a/DryadVertex/VertexHost/system/dprocess/packages.config b/DryadVertex/VertexHost/system/dprocess/packages.config index 7868d60..cb331ea 100644 --- a/DryadVertex/VertexHost/system/dprocess/packages.config +++ b/DryadVertex/VertexHost/system/dprocess/packages.config @@ -1,4 +1,4 @@ - + - - + + \ No newline at end of file diff --git a/DryadVertex/VertexHost/system/managedchannel/Properties/AssemblyInfo.cs b/DryadVertex/VertexHost/system/managedchannel/Properties/AssemblyInfo.cs index 2b7c6ee..bb0b4db 100644 --- a/DryadVertex/VertexHost/system/managedchannel/Properties/AssemblyInfo.cs +++ b/DryadVertex/VertexHost/system/managedchannel/Properties/AssemblyInfo.cs @@ -51,5 +51,5 @@ using System.Runtime.InteropServices; // You can specify all the values or you can default the Build and Revision Numbers // by using the '*' as shown below: // [assembly: AssemblyVersion("1.0.*")] -[assembly: AssemblyVersion("0.1.0.0")] -[assembly: AssemblyFileVersion("0.1.0.0")] +[assembly: AssemblyVersion("0.1.2.0")] +[assembly: AssemblyFileVersion("0.1.2.0")] diff --git a/DryadVertex/VertexHost/system/managedchannel/managedchannel.csproj b/DryadVertex/VertexHost/system/managedchannel/managedchannel.csproj index 4a6e56b..fc6d77f 100644 --- a/DryadVertex/VertexHost/system/managedchannel/managedchannel.csproj +++ b/DryadVertex/VertexHost/system/managedchannel/managedchannel.csproj @@ -1,6 +1,6 @@ - + - + Debug @@ -53,15 +53,39 @@ False - ..\..\..\..\packages\Microsoft.Research.Peloponnese.0.7.1-beta\lib\net45\Microsoft.Research.Peloponnese.HadoopBridge.dll + ..\..\..\..\packages\Microsoft.Research.Peloponnese.0.7.2-beta\lib\net45\Microsoft.Research.Peloponnese.HadoopBridge.dll - + False - ..\..\..\..\packages\Microsoft.Research.Peloponnese.0.7.1-beta\lib\net45\Microsoft.Research.Peloponnese.Utils.dll + ..\..\..\..\packages\Microsoft.Research.Peloponnese.0.7.2-beta\lib\net45\Microsoft.Research.Peloponnese.Utils.dll + + + False + ..\..\..\..\packages\Microsoft.Bcl.Async.1.0.166\lib\net40\Microsoft.Threading.Tasks.dll + + + False + ..\..\..\..\packages\Microsoft.Bcl.Async.1.0.166\lib\net40\Microsoft.Threading.Tasks.Extensions.dll + + + False + ..\..\..\..\packages\Microsoft.Bcl.Async.1.0.166\lib\net40\Microsoft.Threading.Tasks.Extensions.Desktop.dll + + + False + ..\..\..\..\packages\Microsoft.WindowsAzure.Common.1.0.1\lib\net45\Microsoft.WindowsAzure.Common.dll + + + False + ..\..\..\..\packages\Microsoft.WindowsAzure.Common.1.0.1\lib\net45\Microsoft.WindowsAzure.Common.NetFramework.dll ..\..\..\..\packages\Microsoft.WindowsAzure.ConfigurationManager.2.0.3\lib\net40\Microsoft.WindowsAzure.Configuration.dll + + False + ..\..\..\..\packages\Microsoft.WindowsAzure.Management.1.0.0\lib\net40\Microsoft.WindowsAzure.Management.dll + False ..\..\..\..\packages\Microsoft.WindowsAzure.Management.HDInsight.1.1.0.7\lib\net40\Microsoft.WindowsAzure.Management.HDInsight.dll @@ -74,6 +98,10 @@ False ..\..\..\..\packages\Microsoft.Hadoop.Client.1.1.0.7\lib\net40\Microsoft.WindowsAzure.Management.HDInsight.Framework.Core.dll + + False + ..\..\..\..\packages\Microsoft.WindowsAzure.Management.Storage.1.0.0\lib\net40\Microsoft.WindowsAzure.Management.Storage.dll + False ..\..\..\..\packages\WindowsAzure.Storage.3.1.0.1\lib\net40\Microsoft.WindowsAzure.Storage.dll @@ -85,6 +113,17 @@ + + + + False + ..\..\..\..\packages\Microsoft.Net.Http.2.2.19\lib\net45\System.Net.Http.Extensions.dll + + + False + ..\..\..\..\packages\Microsoft.Net.Http.2.2.19\lib\net45\System.Net.Http.Primitives.dll + + False ..\..\..\..\packages\System.Spatial.5.6.1\lib\net40\System.Spatial.dll @@ -112,14 +151,19 @@ + + + + + This project references NuGet package(s) that are missing on this computer. Enable NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. - - + + - + - + \ No newline at end of file diff --git a/DryadVertex/VertexHost/system/managedchannel/packages.config b/DryadVertex/VertexHost/system/managedchannel/packages.config index 6cc3081..bd29d67 100644 --- a/DryadVertex/VertexHost/system/managedchannel/packages.config +++ b/DryadVertex/VertexHost/system/managedchannel/packages.config @@ -1,13 +1,21 @@ - + + + + - + + + + + + - + \ No newline at end of file diff --git a/DryadVertex/VertexHost/vertex/vertexHost/VertexHost.vcxproj b/DryadVertex/VertexHost/vertex/vertexHost/VertexHost.vcxproj index 6e7a3ca..bef34dd 100644 --- a/DryadVertex/VertexHost/vertex/vertexHost/VertexHost.vcxproj +++ b/DryadVertex/VertexHost/vertex/vertexHost/VertexHost.vcxproj @@ -1,6 +1,6 @@ - + - + Debug @@ -124,13 +124,13 @@ - + This project references NuGet package(s) that are missing on this computer. Enable NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. - - + + - + \ No newline at end of file diff --git a/DryadVertex/VertexHost/vertex/vertexHost/packages.config b/DryadVertex/VertexHost/vertex/vertexHost/packages.config index 7868d60..cb331ea 100644 --- a/DryadVertex/VertexHost/vertex/vertexHost/packages.config +++ b/DryadVertex/VertexHost/vertex/vertexHost/packages.config @@ -1,4 +1,4 @@ - + - - + + \ No newline at end of file diff --git a/GraphManager/GraphManager.vcxproj b/GraphManager/GraphManager.vcxproj index 252ea35..5304f03 100644 --- a/GraphManager/GraphManager.vcxproj +++ b/GraphManager/GraphManager.vcxproj @@ -1,6 +1,6 @@ - + - + Debug @@ -223,13 +223,13 @@ - + This project references NuGet package(s) that are missing on this computer. Enable NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. - - + + - + \ No newline at end of file diff --git a/GraphManager/GraphManager.vcxproj.filters b/GraphManager/GraphManager.vcxproj.filters index 38cadcf..5c493af 100644 --- a/GraphManager/GraphManager.vcxproj.filters +++ b/GraphManager/GraphManager.vcxproj.filters @@ -5,10 +5,6 @@ {93995380-89BD-4b04-88EB-625FBE52EBFB} h;hpp;hxx;hm;inl;inc;xsd - - {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} - rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx - {4FC737F1-C7A5-4376-A066-2A32D752A2FF} cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx diff --git a/GraphManager/packages.config b/GraphManager/packages.config index 3674cc4..cb331ea 100644 --- a/GraphManager/packages.config +++ b/GraphManager/packages.config @@ -1,4 +1,4 @@  - + \ No newline at end of file diff --git a/JobBrowser/JOM/ClusterConfiguration.cs b/JobBrowser/JOM/ClusterConfiguration.cs index 4d1d1c9..2b897fa 100644 --- a/JobBrowser/JOM/ClusterConfiguration.cs +++ b/JobBrowser/JOM/ClusterConfiguration.cs @@ -19,30 +19,31 @@ limitations under the License. */ -using Microsoft.Research.Calypso.Tools; +using System.Security.Cryptography.X509Certificates; +using System.Xml.Linq; using System; using System.Collections.Generic; using System.IO; using System.Linq; using System.Text; -using System.Text.RegularExpressions; using System.Net; -using System.Diagnostics; using Microsoft.Research.Peloponnese.Storage; +using Microsoft.Research.Tools; +using Microsoft.WindowsAzure.Management.HDInsight; -namespace Microsoft.Research.Calypso.JobObjectModel +namespace Microsoft.Research.JobObjectModel { /// /// Error during conversation with cluster. /// - public sealed class CalypsoClusterException : Exception + public sealed class ClusterException : Exception { /// /// Create an exception about handling a cluster. /// /// Exception message. - public CalypsoClusterException(string message) : base(message) { } + public ClusterException(string message) : base(message) { } } /// @@ -606,13 +607,22 @@ namespace Microsoft.Research.Calypso.JobObjectModel /// public abstract string Initialize(); + /// + /// Enumerate all clusters this user is subscribed to. + /// + /// A list of clusters. + public static IEnumerable EnumerateSubscribedClusters() + { + return AzureDfsClusterConfiguration.EnumerateAzureDfsSubscribedClusters(); + } + /// /// Create serialization data structure for this configuration. /// /// The corresponding serialization. public ClusterConfigurationSerialization ExtractData() { - ClusterConfigurationSerialization result = new ClusterConfigurationSerialization() + ClusterConfigurationSerialization result = new ClusterConfigurationSerialization { Type = this.TypeOfCluster, Name = this.Name, @@ -925,7 +935,7 @@ namespace Microsoft.Research.Calypso.JobObjectModel IClusterResidentObject dir = this.ProcessWorkDirectory(job.ManagerProcessGuid, true, job.Machine, job); // immutable var matchingfiles = dir.GetFilesAndFolders("DryadLinqProgram__*.xml").ToList(); if (matchingfiles.Count() != 1) - throw new CalypsoClusterException("Could not find query plan file; got " + matchingfiles.Count() + " possible matches"); + throw new ClusterException("Could not find query plan file; got " + matchingfiles.Count() + " possible matches"); IClusterResidentObject result = matchingfiles.First(); result.ShouldCacheLocally = true; // immutable return result; @@ -1306,7 +1316,7 @@ namespace Microsoft.Research.Calypso.JobObjectModel //IClusterResidentObject dir = this.ProcessWorkDirectory(new DryadProcessIdentifier("Process.000.001"), true, job.Machine, job); var matchingfiles = dir.GetFilesAndFolders("DryadLinqProgram__*.xml").ToList(); if (matchingfiles.Count() != 1) - throw new CalypsoClusterException("Could not find query plan file; got " + matchingfiles.Count() + " possible matches"); + throw new ClusterException("Could not find query plan file; got " + matchingfiles.Count() + " possible matches"); IClusterResidentObject result = matchingfiles.First(); result.ShouldCacheLocally = true; // immutable return result; @@ -1395,10 +1405,67 @@ namespace Microsoft.Research.Calypso.JobObjectModel { } + /// + /// Enumerate all the clusters this user is subscribed to. + /// + /// The list of clusters this user is subscribed to. + public static IEnumerable EnumerateAzureDfsSubscribedClusters() + { + var store = new X509Store(); + store.Open(OpenFlags.ReadOnly); + var configDir = Path.Combine( + Environment.GetFolderPath(Environment.SpecialFolder.ApplicationData), + "Windows Azure Powershell"); + var defaultFile = Path.Combine(configDir, "WindowsAzureProfile.xml"); + if (File.Exists(defaultFile)) + { + using (FileStream s = new FileStream(defaultFile, FileMode.Open, FileAccess.Read)) + { + XDocument doc = XDocument.Load(s); + XNamespace ns = doc.Root.GetDefaultNamespace(); + IEnumerable subs = doc.Descendants(ns + "AzureSubscriptionData"); + foreach (XElement sub in subs) + { + string thumbprint = sub.Descendants(ns + "ManagementCertificate").Single().Value; + string subId = sub.Descendants(ns + "SubscriptionId").Single().Value; + Guid subGuid = new Guid(subId); + + X509Certificate2 cert = store.Certificates.Cast().First(item => item.Thumbprint == thumbprint); + + HDInsightCertificateCredential sCred = new HDInsightCertificateCredential(subGuid, cert); + IHDInsightClient sClient = HDInsightClient.Connect(sCred); + var clusters = sClient.ListClusters(); + foreach (var cluster in clusters) + { + var account = cluster.DefaultStorageAccount; + var accountName = account.Name.Split('.').First(); + Console.WriteLine("Cluster " + cluster.Name + " uses account " + accountName + " with key " + account.Key); + + AzureDfsClusterConfiguration config = null; + try + { + config = new AzureDfsClusterConfiguration(); + config.AzureClient = new AzureDfsClient(accountName, account.Key, "dryad-jobs"); + config.Name = cluster.Name; + } + catch (Exception ex) + { + Console.WriteLine("Exception while reconstructing cluster " + cluster.Name + ": " + ex); + } + + if (config != null) + yield return config; + } + } + } + } + } + /// /// Azure account name. /// - public string AccountName { get; set; } + public + string AccountName { get; set; } /// /// Azure account key. /// @@ -1567,7 +1634,7 @@ namespace Microsoft.Research.Calypso.JobObjectModel IClusterResidentObject dir = this.ProcessWorkDirectory(job.ManagerProcessGuid, true, job.Machine, job); // immutable var matchingfiles = dir.GetFilesAndFolders("DryadLinqProgram__*.xml").ToList(); if (matchingfiles.Count() != 1) - throw new CalypsoClusterException("Could not find query plan file; got " + matchingfiles.Count() + " possible matches"); + throw new ClusterException("Could not find query plan file; got " + matchingfiles.Count() + " possible matches"); IClusterResidentObject result = matchingfiles.First(); (result as AzureDfsFile).IsDfsStream = true; result.ShouldCacheLocally = true; // immutable diff --git a/JobBrowser/JOM/ClusterStatus.cs b/JobBrowser/JOM/ClusterStatus.cs index 57c11a6..a7ff380 100644 --- a/JobBrowser/JOM/ClusterStatus.cs +++ b/JobBrowser/JOM/ClusterStatus.cs @@ -23,11 +23,10 @@ using System; using System.Collections.Generic; using System.IO; using System.Linq; -using System.Text; -using System.Threading.Tasks; -using Microsoft.Research.Calypso.Tools; +using Microsoft.Research.Peloponnese.Storage; +using Microsoft.Research.Tools; -namespace Microsoft.Research.Calypso.JobObjectModel +namespace Microsoft.Research.JobObjectModel { /// /// Dynamic information of all the jobs and machines in a cluster. @@ -119,22 +118,20 @@ namespace Microsoft.Research.Calypso.JobObjectModel /// The cached of tasks on the cluster. /// /// Virtual cluster selected; defined only for Scope clusters. - /// Delegate used to report errors. - /// Used to report progress. - public IEnumerable GetClusterJobList(string virtualCluster, StatusReporter reporter, Action reportProgress) + /// Communication manager. + public IEnumerable GetClusterJobList(string virtualCluster, CommManager manager) { - this.RecomputeClusterJobList(virtualCluster, reporter, reportProgress); + this.RecomputeClusterJobList(virtualCluster, manager); return this.clusterJobs.Values.ToList(); } /// /// Force the recomputation of the cluster job list. /// - /// Delegate used to report errors. /// Virtual cluster to use (defined only for some cluster types). - /// Used to report progress. + /// Communication manager. // ReSharper disable once UnusedParameter.Global - protected abstract void RecomputeClusterJobList(string virtualCluster, StatusReporter reporter, Action reportProgress); + protected abstract void RecomputeClusterJobList(string virtualCluster, CommManager manager); /// /// Discover the (unique) dryadlinq job corresponding to a cluster job. @@ -160,12 +157,11 @@ namespace Microsoft.Research.Calypso.JobObjectModel /// /// Job to discover. /// The cluster job, or null if not found. - /// Delegate used to report errors. - /// Used to report progress. - public virtual ClusterJobInformation DiscoverClusterJob(DryadLinqJobSummary job, StatusReporter reporter, Action reportProgress) + /// Communication manager. + public virtual ClusterJobInformation DiscoverClusterJob(DryadLinqJobSummary job, CommManager manager) { if (this.clusterJobs == null) - this.RecomputeClusterJobList(job.VirtualCluster, reporter, reportProgress); + this.RecomputeClusterJobList(job.VirtualCluster, manager); return this.clusterJobs[job.ClusterJobId]; } @@ -173,13 +169,12 @@ namespace Microsoft.Research.Calypso.JobObjectModel /// Refresh the job summary status. /// /// Summary to refresh. - /// Delegate used to report errors. - /// Used to report progress. - public virtual void RefreshStatus(DryadLinqJobSummary summary, StatusReporter reporter, Action reportProgress) + /// Communication manager. + public virtual void RefreshStatus(DryadLinqJobSummary summary, CommManager manager) { // refresh the whole list - this.RecomputeClusterJobList(summary.VirtualCluster, reporter, reportProgress); - ClusterJobInformation info = this.DiscoverClusterJob(summary, reporter, reportProgress); + this.RecomputeClusterJobList(summary.VirtualCluster, manager); + ClusterJobInformation info = this.DiscoverClusterJob(summary, manager); if (info == null) { summary.Status = ClusterJobInformation.ClusterJobStatus.Unknown; @@ -226,10 +221,9 @@ namespace Microsoft.Research.Calypso.JobObjectModel /// /// Recompute the list of jobs on the cluster and add them to the clusterJobs field. /// - /// Delegate used to report errors. /// Unused. - /// Used to report progress. - protected override void RecomputeClusterJobList(string virtualCluster, StatusReporter reporter, Action reportProgress) + /// Communication manager. + protected override void RecomputeClusterJobList(string virtualCluster, CommManager manager) { this.clusterJobs = new Dictionary(); if (string.IsNullOrEmpty(CachedClusterResidentObject.CacheDirectory)) @@ -242,32 +236,32 @@ namespace Microsoft.Research.Calypso.JobObjectModel string[] files = Directory.GetFiles(joblist, "*.xml"); foreach (var file in files) { + manager.Token.ThrowIfCancellationRequested(); DryadLinqJobSummary job = Utilities.LoadXml(file); string cjid = job.Cluster + "-" + job.ClusterJobId; // there may be two jobs with same id from different clusters ClusterJobInformation ci = new ClusterJobInformation(this.Config.Name, job.Cluster, cjid, job.Name, job.User, job.Date, job.EndTime - job.Date, job.Status); ci.SetAssociatedSummary(job); if (this.clusterJobs.ContainsKey(cjid)) { - reporter("Duplicate job id, cannot insert in cache " + job.AsIdentifyingString(), StatusKind.Error); + manager.Status("Duplicate job id, cannot insert in cache " + job.AsIdentifyingString(), StatusKind.Error); continue; } this.clusterJobs.Add(cjid, ci); } - reportProgress(100); + manager.Progress(100); } /// /// Refresh the job summary status. /// /// Summary to refresh. - /// Delegate used to report errors. - /// Used to report progres. - public override void RefreshStatus(DryadLinqJobSummary job, StatusReporter reporter, Action reportProgress) + /// Communication manager. + public override void RefreshStatus(DryadLinqJobSummary job, CommManager manager) { ClusterConfiguration actual = (this.Config as CacheClusterConfiguration).ActualConfig(job); ClusterStatus actualStatus = actual.CreateClusterStatus(); - actualStatus.RefreshStatus(job, reporter, reportProgress); - ClusterJobInformation info = actualStatus.DiscoverClusterJob(job, reporter, reportProgress); + actualStatus.RefreshStatus(job, manager); + ClusterJobInformation info = actualStatus.DiscoverClusterJob(job, manager); if (info == null) { job.Status = ClusterJobInformation.ClusterJobStatus.Unknown; @@ -303,13 +297,12 @@ namespace Microsoft.Research.Calypso.JobObjectModel /// /// Cluster job. /// Throws an exception. - /// Delegate used to report errors. - /// Used to report progress. - public override ClusterJobInformation DiscoverClusterJob(DryadLinqJobSummary job, StatusReporter reporter, Action reportProgress) + /// Communication manager. + public override ClusterJobInformation DiscoverClusterJob(DryadLinqJobSummary job, CommManager manager) { ClusterConfiguration actual = (this.Config as CacheClusterConfiguration).ActualConfig(job); ClusterStatus actualStatus = actual.CreateClusterStatus(); - return actualStatus.DiscoverClusterJob(job, reporter, reportProgress); + return actualStatus.DiscoverClusterJob(job, manager); } /// @@ -358,10 +351,9 @@ namespace Microsoft.Research.Calypso.JobObjectModel /// /// Force the recomputation of the cluster job list. /// - /// Delegate used to report errors. /// Virtual cluster to use (defined only for some cluster types). - /// Used to report progress. - protected override void RecomputeClusterJobList(string virtualCluster, StatusReporter reporter, Action reportProgress) + /// Communication manager. + protected override void RecomputeClusterJobList(string virtualCluster, CommManager manager) { this.clusterJobs = new Dictionary(); if (!Directory.Exists(this.config.JobsFolder)) @@ -371,6 +363,7 @@ namespace Microsoft.Research.Calypso.JobObjectModel int done = 0; foreach (var job in subfolders) { + manager.Token.ThrowIfCancellationRequested(); string jobId = Path.GetFileName(job); ClusterJobInformation info = this.GetJobInfo(job, jobId); if (info != null) @@ -378,9 +371,9 @@ namespace Microsoft.Research.Calypso.JobObjectModel // ReSharper disable once AssignNullToNotNullAttribute this.clusterJobs.Add(jobId, info); } - reportProgress(done++ *100/subfolders.Length); + manager.Progress(done++ *100/subfolders.Length); } - reportProgress(100); + manager.Progress(100); } /// @@ -478,10 +471,9 @@ namespace Microsoft.Research.Calypso.JobObjectModel /// /// Force the recomputation of the cluster job list. /// - /// Delegate used to report errors. /// Virtual cluster to use (defined only for some cluster types). - /// Used to report progress. - protected override void RecomputeClusterJobList(string virtualCluster, StatusReporter reporter, Action reportProgress) + /// Communication manager. + protected override void RecomputeClusterJobList(string virtualCluster, CommManager manager) { this.clusterJobs = new Dictionary(); var jobs = this.config.AzureClient.EnumerateDirectory("").ToList(); @@ -489,15 +481,16 @@ namespace Microsoft.Research.Calypso.JobObjectModel int done = 0; foreach (var job in jobs) { + manager.Token.ThrowIfCancellationRequested(); ClusterJobInformation info = this.GetJobInfo(job); if (info != null) { // ReSharper disable once AssignNullToNotNullAttribute this.clusterJobs.Add(job, info); } - reportProgress(100*done++/jobs.Count); + manager.Progress(100*done++/jobs.Count); } - reportProgress(100); + manager.Progress(100); } /// @@ -547,23 +540,12 @@ namespace Microsoft.Research.Calypso.JobObjectModel DateTime lastHeartBeat = DateTime.MinValue; ClusterJobInformation.ClusterJobStatus status = ClusterJobInformation.ClusterJobStatus.Unknown; bool found = false; + string jobName = jobRootFolder; var jobsFolders = this.config.AzureClient.EnumerateDirectory(jobRootFolder).ToList(); foreach (var file in jobsFolders) { - if (file.Contains("DryadLinqProgram__")) - { - var blob = this.config.AzureClient.Container.GetBlockBlobReference(file); - blob.FetchAttributes(); - var props = blob.Properties; - if (props.LastModified.HasValue) - { - date = props.LastModified.Value.DateTime; - date = date.ToLocalTime(); - } - found = true; - } - else if (file.EndsWith("heartbeat")) + if (file.EndsWith("heartbeat")) { var blob = this.config.AzureClient.Container.GetPageBlobReference(file); blob.FetchAttributes(); @@ -582,6 +564,9 @@ namespace Microsoft.Research.Calypso.JobObjectModel case "running": status = ClusterJobInformation.ClusterJobStatus.Running; break; + case "killed": + status = ClusterJobInformation.ClusterJobStatus.Cancelled; + break; default: Console.WriteLine("Unknown status " + st); break; @@ -599,6 +584,31 @@ namespace Microsoft.Research.Calypso.JobObjectModel status = ClusterJobInformation.ClusterJobStatus.Failed; } } + if (props.ContainsKey("jobname")) + { + jobName = props["jobname"]; + } + if (props.ContainsKey("starttime")) + { + var t = props["starttime"]; + if (DateTime.TryParse(t, out date)) + date = date.ToLocalTime(); + } + + found = true; + } + else if (file.Contains("DryadLinqProgram__") && + // newer heartbeats contain the date + date != DateTime.MinValue) + { + var blob = this.config.AzureClient.Container.GetBlockBlobReference(file); + blob.FetchAttributes(); + var props = blob.Properties; + if (props.LastModified.HasValue) + { + date = props.LastModified.Value.DateTime; + date = date.ToLocalTime(); + } } } @@ -607,7 +617,7 @@ namespace Microsoft.Research.Calypso.JobObjectModel TimeSpan running = TimeSpan.Zero; if (date != DateTime.MinValue && lastHeartBeat != DateTime.MinValue) running = lastHeartBeat - date; - var info = new ClusterJobInformation(this.config.Name, "", jobRootFolder, jobRootFolder, Environment.UserName, date, running, status); + var info = new ClusterJobInformation(this.config.Name, "", jobRootFolder, jobName, Environment.UserName, date, running, status); return info; } @@ -615,9 +625,8 @@ namespace Microsoft.Research.Calypso.JobObjectModel /// Refresh the job summary status. /// /// Summary to refresh. - /// Delegate used to report errors. - /// Used to report progress. - public override void RefreshStatus(DryadLinqJobSummary summary, StatusReporter reporter, Action reportProgress) + /// Communication manager. + public override void RefreshStatus(DryadLinqJobSummary summary, CommManager manager) { // refresh the whole list ClusterJobInformation info = this.GetJobInfo(summary.JobID); @@ -636,7 +645,8 @@ namespace Microsoft.Research.Calypso.JobObjectModel /// True if the cancellation succeeded. public override bool CancelJob(DryadLinqJobSummary job) { - throw new InvalidOperationException(); + AzureUtils.KillJob(this.config.AccountName, this.config.AccountKey, this.config.Container, job.ClusterJobId); + return true; } } } diff --git a/JobBrowser/JOM/JobObjectModel.csproj b/JobBrowser/JOM/JobObjectModel.csproj index b7c7852..cabcc93 100644 --- a/JobBrowser/JOM/JobObjectModel.csproj +++ b/JobBrowser/JOM/JobObjectModel.csproj @@ -1,6 +1,6 @@ - + - + Debug AnyCPU @@ -10,7 +10,7 @@ Library Properties DistributedDataCollection - Microsoft.Research.DryadLinq.JobBrowser.DistributedDataCollection + DistributedDataCollection v4.5 512 @@ -37,28 +37,6 @@ true - - true - full - false - bin\Debug\ - DEBUG;TRACE - prompt - 4 - bin\Debug\Microsoft.Research.DryadLinq.JobBrowser.DistributedDataCollection.xml - AllRules.ruleset - false - - - pdbonly - true - bin\Release\ - TRACE - prompt - 4 - AllRules.ruleset - false - true bin\x64\Debug\ @@ -91,43 +69,59 @@ False ..\packages\Microsoft.Data.Services.Client.5.6.1\lib\net40\Microsoft.Data.Services.Client.dll - + + False ..\packages\Microsoft.Hadoop.Client.1.1.0.7\lib\net40\Microsoft.Hadoop.Client.dll - - False - ..\Dependencies\Microsoft.Hpc.Scheduler.dll - - - False - ..\Dependencies\Microsoft.Hpc.Scheduler.Properties.dll - False - ..\packages\Microsoft.Research.Peloponnese.0.7.1-beta\lib\net45\Microsoft.Research.Peloponnese.HadoopBridge.dll + ..\packages\Microsoft.Research.Peloponnese.0.7.2-beta\lib\net45\Microsoft.Research.Peloponnese.HadoopBridge.dll False - ..\packages\Microsoft.Research.Peloponnese.0.7.1-beta\lib\net45\Microsoft.Research.Peloponnese.Utils.dll + ..\packages\Microsoft.Research.Peloponnese.0.7.2-beta\lib\net45\Microsoft.Research.Peloponnese.Utils.dll + + + ..\packages\Microsoft.Bcl.Async.1.0.166\lib\net40\Microsoft.Threading.Tasks.dll + + + ..\packages\Microsoft.Bcl.Async.1.0.166\lib\net40\Microsoft.Threading.Tasks.Extensions.dll + + + ..\packages\Microsoft.Bcl.Async.1.0.166\lib\net40\Microsoft.Threading.Tasks.Extensions.Desktop.dll + + + ..\packages\Microsoft.WindowsAzure.Common.1.0.1\lib\net45\Microsoft.WindowsAzure.Common.dll + + + ..\packages\Microsoft.WindowsAzure.Common.1.0.1\lib\net45\Microsoft.WindowsAzure.Common.NetFramework.dll ..\packages\Microsoft.WindowsAzure.ConfigurationManager.2.0.3\lib\net40\Microsoft.WindowsAzure.Configuration.dll - + + ..\packages\Microsoft.WindowsAzure.Management.1.0.0\lib\net40\Microsoft.WindowsAzure.Management.dll + + + False ..\packages\Microsoft.WindowsAzure.Management.HDInsight.1.1.0.7\lib\net40\Microsoft.WindowsAzure.Management.HDInsight.dll - + + False ..\packages\Microsoft.Hadoop.Client.1.1.0.7\lib\net40\Microsoft.WindowsAzure.Management.HDInsight.Framework.dll - + + False ..\packages\Microsoft.Hadoop.Client.1.1.0.7\lib\net40\Microsoft.WindowsAzure.Management.HDInsight.Framework.Core.dll + + ..\packages\Microsoft.WindowsAzure.Management.Storage.1.0.0\lib\net40\Microsoft.WindowsAzure.Management.Storage.dll + False ..\packages\WindowsAzure.Storage.3.1.0.1\lib\net40\Microsoft.WindowsAzure.Storage.dll - - False + ..\packages\Newtonsoft.Json.6.0.2\lib\net45\Newtonsoft.Json.dll @@ -135,6 +129,15 @@ 3.5 + + + + ..\packages\Microsoft.Net.Http.2.2.19\lib\net45\System.Net.Http.Extensions.dll + + + ..\packages\Microsoft.Net.Http.2.2.19\lib\net45\System.Net.Http.Primitives.dll + + False ..\packages\System.Spatial.5.6.1\lib\net40\System.Spatial.dll @@ -153,7 +156,7 @@ - + @@ -186,15 +189,19 @@ - + + + + + This project references NuGet package(s) that are missing on this computer. Enable NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. - - + + - + - + \ No newline at end of file diff --git a/JobBrowser/JOM/Properties/AssemblyInfo.cs b/JobBrowser/JOM/Properties/AssemblyInfo.cs index 4b3319d..f51b2bc 100644 --- a/JobBrowser/JOM/Properties/AssemblyInfo.cs +++ b/JobBrowser/JOM/Properties/AssemblyInfo.cs @@ -28,9 +28,9 @@ using System.Runtime.InteropServices; [assembly: AssemblyTitle("DistributedDataCollection")] [assembly: AssemblyDescription("")] [assembly: AssemblyConfiguration("")] -[assembly: AssemblyCompany("Microsoft Corporation")] +[assembly: AssemblyCompany("")] [assembly: AssemblyProduct("DistributedDataCollection")] -[assembly: AssemblyCopyright("Copyright © Microsoft Corporation. All rights reserved.")] +[assembly: AssemblyCopyright("Copyright © 2009")] [assembly: AssemblyTrademark("")] [assembly: AssemblyCulture("")] @@ -52,5 +52,5 @@ using System.Runtime.InteropServices; // You can specify all the values or you can default the Build and Revision Numbers // by using the '*' as shown below: // [assembly: AssemblyVersion("1.0.*")] -[assembly: AssemblyVersion("0.1.0.0")] -[assembly: AssemblyFileVersion("0.1.0.0")] +[assembly: AssemblyVersion("1.0.0.0")] +[assembly: AssemblyFileVersion("1.0.0.0")] diff --git a/JobBrowser/JOM/app.config b/JobBrowser/JOM/app.config index 932f99f..5e4a799 100644 --- a/JobBrowser/JOM/app.config +++ b/JobBrowser/JOM/app.config @@ -2,10 +2,6 @@ - - - - @@ -22,6 +18,10 @@ + + + + \ No newline at end of file diff --git a/JobBrowser/JOM/clusteraccess.cs b/JobBrowser/JOM/clusteraccess.cs index 594d56f..3cf0ec9 100644 --- a/JobBrowser/JOM/clusteraccess.cs +++ b/JobBrowser/JOM/clusteraccess.cs @@ -20,15 +20,15 @@ limitations under the License. */ using System.Text.RegularExpressions; -using Microsoft.Research.Calypso.Tools; using System; using System.Collections.Generic; using System.IO; using System.Diagnostics; using Microsoft.Research.Peloponnese.Storage; +using Microsoft.Research.Tools; using Microsoft.WindowsAzure.Storage.Blob; -namespace Microsoft.Research.Calypso.JobObjectModel +namespace Microsoft.Research.JobObjectModel { /// /// A cluster-resident object is a file or a folder. @@ -158,7 +158,7 @@ namespace Microsoft.Research.Calypso.JobObjectModel if (file.Job == null) return; if (string.IsNullOrEmpty(file.LocalCachePath)) - throw new CalypsoClusterException("Missing expected LocalCachePath"); + throw new ClusterException("Missing expected LocalCachePath"); CachedClusterResidentObject.RecordCachedFile(file.Job, file.LocalCachePath); } @@ -375,7 +375,7 @@ namespace Microsoft.Research.Calypso.JobObjectModel { // cache it if (this.RepresentsAFolder) - throw new CalypsoClusterException("Cannot cache folders"); + throw new ClusterException("Cannot cache folders"); StreamWriter writer = this.CreateTempStream(); return new FileSharedStreamReader(this.Pathname.ToString(), writer, this.OnClose); @@ -504,7 +504,7 @@ namespace Microsoft.Research.Calypso.JobObjectModel get { if (this.RepresentsAFolder) - throw new CalypsoClusterException("Cannot get size of a folder"); + throw new ClusterException("Cannot get size of a folder"); if (File.Exists(this.LocalCachePath)) { FileInfo info = new FileInfo(this.LocalCachePath); @@ -732,7 +732,7 @@ namespace Microsoft.Research.Calypso.JobObjectModel /// Throws an exception. public IEnumerable GetFilesAndFolders(string match) { - throw new CalypsoClusterException("Object is not a folder"); + throw new ClusterException("Object is not a folder"); } private long size; @@ -787,7 +787,7 @@ namespace Microsoft.Research.Calypso.JobObjectModel /// Throws an exception. public IClusterResidentObject GetFile(string filename) { - throw new CalypsoClusterException("Object is not a folder"); + throw new ClusterException("Object is not a folder"); } /// @@ -797,7 +797,7 @@ namespace Microsoft.Research.Calypso.JobObjectModel /// Throws an exception. public IClusterResidentObject GetFolder(string foldername) { - throw new CalypsoClusterException("Object is not a folder"); + throw new ClusterException("Object is not a folder"); } } @@ -1011,13 +1011,24 @@ namespace Microsoft.Research.Calypso.JobObjectModel this.client.ContainerName, this.path); } - StreamReader reader = new StreamReader(stream, System.Text.Encoding.UTF8, false, 1024 * 1024); + + long size = this.Size; + int bufferSize = 1024*1024; + if (size >= 0) + { + bufferSize = (int)(size/10); + if (bufferSize < 1024*1024) + bufferSize = 1024*1024; + if (bufferSize > 20*1024*1024) + bufferSize = 20*1024*1024; + } + StreamReader reader = new StreamReader(stream, System.Text.Encoding.UTF8, false, bufferSize); if (this.ShouldCacheLocally && this.LocalCachePath != null) { // cache it if (this.RepresentsAFolder) - throw new CalypsoClusterException("Cannot cache folders"); + throw new ClusterException("Cannot cache folders"); StreamWriter writer = this.CreateTempStream(); return new SharedStreamReader(reader, writer, this.OnClose); } diff --git a/JobBrowser/JOM/cosmos.cs b/JobBrowser/JOM/dryadlog.cs similarity index 97% rename from JobBrowser/JOM/cosmos.cs rename to JobBrowser/JOM/dryadlog.cs index 11bc3cc..2f0453f 100644 --- a/JobBrowser/JOM/cosmos.cs +++ b/JobBrowser/JOM/dryadlog.cs @@ -19,17 +19,17 @@ limitations under the License. */ -namespace Microsoft.Research.Calypso.JobObjectModel +namespace Microsoft.Research.JobObjectModel { using System.Text.RegularExpressions; using System; using System.Diagnostics; /// - /// Information about a standard Cosmos log entry. + /// Information about a standard Dryad log entry. /// [Serializable] - public class CosmosLogEntry : IParse + public class DryadLogEntry : IParse { /// /// Message severity. @@ -98,7 +98,7 @@ namespace Microsoft.Research.Calypso.JobObjectModel /// /// Allocate an empty log entry /// - public CosmosLogEntry() + public DryadLogEntry() { this.Malformed = true; } @@ -107,7 +107,7 @@ namespace Microsoft.Research.Calypso.JobObjectModel /// Allocate a log entry from a given string. /// /// String to initialize the log entry. - public CosmosLogEntry(string line) + public DryadLogEntry(string line) { this.Malformed = true; // ReSharper disable once DoNotCallOverridableMethodsInConstructor @@ -167,7 +167,7 @@ namespace Microsoft.Research.Calypso.JobObjectModel /// An extended log entry is like a cosmos log entry, but it has a prefix: GUID,Machine /// [Serializable] - public class ExtendedLogEntry : CosmosLogEntry + public class ExtendedLogEntry : DryadLogEntry { static Regex loglineregex = new Regex(@"([-0-9A-F]+), # 1 guid, inserted by reader diff --git a/JobBrowser/JOM/jobinfo.cs b/JobBrowser/JOM/jobinfo.cs index 9e47d1d..231ab2a 100644 --- a/JobBrowser/JOM/jobinfo.cs +++ b/JobBrowser/JOM/jobinfo.cs @@ -27,24 +27,23 @@ using System.Collections.Generic; using System.Globalization; using System.IO; using System.Linq; -using System.Text; using System.Text.RegularExpressions; using System.Xml.Linq; -using Microsoft.Research.Calypso.Tools; using System.Diagnostics; +using Microsoft.Research.Tools; -namespace Microsoft.Research.Calypso.JobObjectModel +namespace Microsoft.Research.JobObjectModel { /// - /// Exception throw by Calypso when it cannot understand the structure of a Dryad/DryadLINQ job. + /// Exception thrown when we cannot understand the structure of a Dryad/DryadLINQ job. /// - public class CalypsoDryadException : Exception + public class DryadException : Exception { /// - /// Create a new CalypsoDryadException. + /// Create a new DryadException. /// /// Message conveyed by the exception. - public CalypsoDryadException(string message) : base(message) { } + public DryadException(string message) : base(message) { } } /// @@ -195,7 +194,7 @@ namespace Microsoft.Research.Calypso.JobObjectModel this.SuccessfulVertices++; break; default: - throw new CalypsoDryadException("Unexpected vertex state " + vertex.State); + throw new DryadException("Unexpected vertex state " + vertex.State); } } this.TotalInitiatedVertices -= this.AbandonedVertices; @@ -423,22 +422,21 @@ namespace Microsoft.Research.Calypso.JobObjectModel /// Cluster configuration. /// Summary description of the job. /// The Dryad job description, or null. - /// Delegate used to report errors. /// If true, fill all the information, otherwise the user will have to call FillInformation on the result later. - /// Delegate used to report progress. - public static DryadLinqJobInfo CreateDryadLinqJobInfo(ClusterConfiguration cf, DryadLinqJobSummary summary, bool fill, StatusReporter reporter, Action updateProgress) + /// Communication manager. + public static DryadLinqJobInfo CreateDryadLinqJobInfo(ClusterConfiguration cf, DryadLinqJobSummary summary, bool fill, CommManager manager) { try { DryadLinqJobInfo job = new DryadLinqJobInfo(cf, summary); if (fill) - job.CollectEssentialInformation(reporter, updateProgress); + job.CollectEssentialInformation(manager); return job; } catch (Exception e) { Trace.TraceInformation(e.ToString()); - reporter("Could not collect job information for " + summary.Name + ": " + e.Message, StatusKind.Error); + manager.Status("Could not collect job information for " + summary.Name + ": " + e.Message, StatusKind.Error); return null; } } @@ -506,7 +504,7 @@ namespace Microsoft.Research.Calypso.JobObjectModel if (this.stdoutpath == null) { - throw new CalypsoClusterException("Could not locate JM standard output file in folder " + jmdir); + throw new ClusterException("Could not locate JM standard output file in folder " + jmdir); } } } @@ -515,9 +513,8 @@ namespace Microsoft.Research.Calypso.JobObjectModel /// /// Refresh the job status. /// - /// Delegate used to report errors. - /// Used to report progress. - public void RefreshJobStatus(StatusReporter reporter, Action updateProgress) + /// Communication manager. + public void RefreshJobStatus(CommManager manager) { // skip if job is finished if (this.Summary.Status == ClusterJobInformation.ClusterJobStatus.Failed || @@ -526,18 +523,17 @@ namespace Microsoft.Research.Calypso.JobObjectModel return; ClusterStatus status = this.ClusterConfiguration.CreateClusterStatus(); - status.RefreshStatus(this.Summary, reporter, updateProgress); + status.RefreshStatus(this.Summary, manager); } /// /// Fill the job info by parsing the stdout.txt. - /// Delegate used to report errors. - /// True if it succeeds, false otherwise. - /// Delegate used to report progress. + /// The updated job. + /// Communication manager. /// - public bool CollectEssentialInformation(StatusReporter statusReporter, Action updateProgress) + public bool CollectEssentialInformation(CommManager manager) { - this.RefreshJobStatus(statusReporter, updateProgress); + this.RefreshJobStatus(manager); if (this.ManagerVertex == null) { this.ManagerVertex = new ExecutedVertexInstance(this, -1, 0, "JobManager", "", this.Summary.Date); @@ -562,13 +558,13 @@ namespace Microsoft.Research.Calypso.JobObjectModel if (this.stdoutpath == null) return false; - bool success = this.ParseStdout(this.stdoutpath, statusReporter, updateProgress); - updateProgress(100); + bool success = this.ParseStdout(this.stdoutpath, manager); + manager.Progress(100); if (!success) return false; this.JobInfoCannotBeCollected = false; - statusReporter("Stdout parsed", StatusKind.OK); + manager.Status("Stdout parsed", StatusKind.OK); this.LastUpdatetime = DateTime.Now; if (this.Summary.Status == ClusterJobInformation.ClusterJobStatus.Running) @@ -600,13 +596,13 @@ namespace Microsoft.Research.Calypso.JobObjectModel { Match m = numberre.Match(vertexlist); if (!m.Success) - throw new CalypsoDryadException("Could not find vertex number in " + vertexlist); + throw new DryadException("Could not find vertex number in " + vertexlist); string number = m.Groups[1].Value; // now scan a balanced number of parantheses string rest = m.Groups[2].Value; if (rest[0] != '(') - throw new CalypsoDryadException("Expecting open parens after vertex number"); + throw new DryadException("Expecting open parens after vertex number"); int opened = 0; int i; for (i = 0; i < rest.Length; i++) @@ -624,7 +620,7 @@ namespace Microsoft.Research.Calypso.JobObjectModel } } if (opened != 0 || i <= 2) - throw new CalypsoDryadException("did not find matched parantheses in vertex name in " + vertexlist + ", can't parse"); + throw new DryadException("did not find matched parantheses in vertex name in " + vertexlist + ", can't parse"); string name = rest.Substring(1, i - 2); // skip first and last paranthesis yield return new Tuple(name, int.Parse(number)); vertexlist = rest.Substring(i); @@ -761,7 +757,9 @@ namespace Microsoft.Research.Calypso.JobObjectModel case "running": { string process; - kvp.TryGetValue("id", out process); // "process" is also good + kvp.TryGetValue("id", out process); + if (process == null) + kvp.TryGetValue("process", out process); string machine = kvp["computer"]; ExecutedVertexInstance vi = this.jobVertices.FindVertex(number, version); this.jobVertices.Remap(vi, process); @@ -776,6 +774,7 @@ namespace Microsoft.Research.Calypso.JobObjectModel vi.SetState(ExecutedVertexInstance.VertexState.Successful); vi.End = timeStamp; vi.ExitCode = ""; + this.UsefulCPUTime += vi.RunningTime; break; } case "failed": @@ -784,7 +783,11 @@ namespace Microsoft.Research.Calypso.JobObjectModel if (vi.State != ExecutedVertexInstance.VertexState.Started) vi.SetState(ExecutedVertexInstance.VertexState.Cancelled); else + { vi.SetState(ExecutedVertexInstance.VertexState.Failed); + if (vi.RunningTime > TimeSpan.Zero) + this.WastedCPUTime += vi.RunningTime; + } if (kvp.ContainsKey("errorstring")) vi.AddErrorString(kvp["errorstring"]); string exitcode; @@ -797,32 +800,15 @@ namespace Microsoft.Research.Calypso.JobObjectModel } else if (kvp.ContainsKey("outputChannel")) { - string chan = kvp["outputChannel"]; - int channelNo = int.Parse(chan); ExecutedVertexInstance vi = this.jobVertices.FindVertex(number, version); - - if (!kvp.ContainsKey("errorstatus")) - { - } - else - { - if (kvp.ContainsKey("errorstring")) - vi.AddErrorString(kvp["errorstring"]); - } + if (kvp.ContainsKey("errorstring")) + vi.AddErrorString(kvp["errorstring"]); } else if (kvp.ContainsKey("inputChannel")) { - string chan = kvp["inputChannel"]; - int channelNo = int.Parse(chan); ExecutedVertexInstance vi = this.jobVertices.FindVertex(number, version); - - if (!kvp.ContainsKey("errorstatus")) - { - } - else - { + if (kvp.ContainsKey("errorstring")) vi.AddErrorString(kvp["errorstring"]); - } } else if (kvp.ContainsKey("io")) { @@ -834,6 +820,7 @@ namespace Microsoft.Research.Calypso.JobObjectModel if (vi.InputChannels == null) vi.InputChannels = new Dictionary(); + for (int i = 0; i < numberOfInputs; i++) { string uri; @@ -870,6 +857,24 @@ namespace Microsoft.Research.Calypso.JobObjectModel vi.DataRead = totalRead; vi.DataWritten = totalWritten; + if (vi.InputChannels != null) + { + foreach (int ch in vi.InputChannels.Keys) + { + long bytes = TryGetNumeric(kvp, "rb." + ch); + vi.InputChannels[ch].Size = bytes; + } + } + + if (vi.OutputChannels != null) + { + foreach (int ch in vi.OutputChannels.Keys) + { + long bytes = TryGetNumeric(kvp, "wb." + ch); + vi.OutputChannels[ch].Size = bytes; + } + } + this.TotalDataRead += totalRead; this.LocalReadData += localRead; this.CrossPodDataRead += tempReadCrossRack; @@ -878,17 +883,26 @@ namespace Microsoft.Research.Calypso.JobObjectModel else if (kvp["io"] == "running") { ExecutedVertexInstance vi = this.jobVertices.FindVertex(number, version); - - foreach (int ch in vi.InputChannels.Keys) + + if (vi.InputChannels != null) { - long bytes = TryGetNumeric(kvp, "rb." + ch); - vi.InputChannels[ch].Size = bytes; + foreach (int ch in vi.InputChannels.Keys) + { + long bytes = TryGetNumeric(kvp, "rb." + ch); + vi.InputChannels[ch].Size = bytes; + + bytes = TryGetNumeric(kvp, "tb." + ch); + vi.InputChannels[ch].TotalSize = bytes; + } } - foreach (int ch in vi.OutputChannels.Keys) + if (vi.InputChannels != null) { - long bytes = TryGetNumeric(kvp, "wb." + ch); - vi.OutputChannels[ch].Size = bytes; + foreach (int ch in vi.OutputChannels.Keys) + { + long bytes = TryGetNumeric(kvp, "wb." + ch); + vi.OutputChannels[ch].Size = bytes; + } } long totalRead = TryGetNumeric(kvp, "totalRead"); @@ -1062,7 +1076,7 @@ namespace Microsoft.Research.Calypso.JobObjectModel else if (m.Groups[1].Value == "ices") onevertex = false; else - throw new CalypsoDryadException("Can't figure out if one or many vertices"); + throw new DryadException("Can't figure out if one or many vertices"); IEnumerable> vertexList = DryadLinqJobInfo.ParseVertices(vertices); @@ -1084,7 +1098,7 @@ namespace Microsoft.Research.Calypso.JobObjectModel } if (vertexcount > 1 && onevertex) - throw new CalypsoDryadException("Expected one vertex, found " + vertexcount); + throw new DryadException("Expected one vertex, found " + vertexcount); } else { @@ -1225,7 +1239,7 @@ namespace Microsoft.Research.Calypso.JobObjectModel if (vi.State == ExecutedVertexInstance.VertexState.Started) { Console.WriteLine("Timing information while vertex is still running " + vi); - //throw new CalypsoClusterException("Timing information for vertex still running: " + vi); + //throw new ClusterException("Timing information for vertex still running: " + vi); } DateTime last = vi.SetTiming(createtime, m.Groups[5].Value, m.Groups[6].Value, m.Groups[7].Value, m.Groups[8].Value, m.Groups[9].Value); if (last > this.lastTimestampSeen) @@ -1245,7 +1259,7 @@ namespace Microsoft.Research.Calypso.JobObjectModel } } else - throw new CalypsoDryadException("Unmatched timing information line " + line); + throw new DryadException("Unmatched timing information line " + line); } else if (line.Contains("Process has failed")) { @@ -1410,7 +1424,7 @@ namespace Microsoft.Research.Calypso.JobObjectModel return retval; } - private ISharedStreamReader cachedStdoutReader = null; + private ISharedStreamReader cachedStdoutReader; /// /// Remember how many lines were parsed, and skip them on a second invocation. @@ -1420,10 +1434,9 @@ namespace Microsoft.Research.Calypso.JobObjectModel /// Parse the stdout.txt file from the job manager. /// /// File to parse. - /// Delegate used to report errors. - /// Delegate used to report progress. + /// Communication manager. /// True if the parsing succeeds. - private bool ParseStdout(IClusterResidentObject file, StatusReporter statusReporter, Action updateProgress) + private bool ParseStdout(IClusterResidentObject file, CommManager manager) { int currentLine = 0; if (this.stdoutLinesParsed == 0) @@ -1440,15 +1453,16 @@ namespace Microsoft.Research.Calypso.JobObjectModel string message = "Scanning JM stdout " + file; if (filesize >= 0) message += string.Format("({0:N0} bytes)", filesize); - statusReporter(message, StatusKind.LongOp); + manager.Status(message, StatusKind.LongOp); if (this.cachedStdoutReader == null) this.cachedStdoutReader = file.GetStream(); if (this.cachedStdoutReader.Exception != null) { - statusReporter("Exception while opening stdout " + this.cachedStdoutReader.Exception.Message, StatusKind.Error); + manager.Status("Exception while opening stdout " + this.cachedStdoutReader.Exception.Message, StatusKind.Error); return false; } + while (!this.cachedStdoutReader.EndOfStream) { string line = this.cachedStdoutReader.ReadLine(); @@ -1457,6 +1471,7 @@ namespace Microsoft.Research.Calypso.JobObjectModel { while (true) { + manager.Token.ThrowIfCancellationRequested(); int startLine = currentLine; bool completeLine = true; try @@ -1465,7 +1480,7 @@ namespace Microsoft.Research.Calypso.JobObjectModel } catch (Exception ex) { - statusReporter(string.Format("Line {0}: Exception {1}", currentLine, ex.Message), StatusKind.Error); + manager.Status(string.Format("Line {0}: Exception {1}", currentLine, ex.Message), StatusKind.Error); Console.WriteLine("Line {0}: Exception {1}", currentLine, ex); } if (!completeLine) @@ -1485,7 +1500,7 @@ namespace Microsoft.Research.Calypso.JobObjectModel currentLine++; if (currentLine % 100 == 0 && filesize > 0) { - updateProgress(Math.Min(100, (int)(100 * readbytes / filesize))); + manager.Progress(Math.Min(100, (int)(100 * readbytes / filesize))); } } @@ -1500,13 +1515,16 @@ namespace Microsoft.Research.Calypso.JobObjectModel // we are done with this stream if (this.ManagerVertex.State == ExecutedVertexInstance.VertexState.Failed || this.ManagerVertex.State == ExecutedVertexInstance.VertexState.Successful) + { this.cachedStdoutReader.Close(); + this.cachedStdoutReader = null; // will force reopening if refreshed + } } return true; } catch (Exception e) { - statusReporter("Exception while reading stdout " + e.Message, StatusKind.Error); + manager.Status("Exception while reading stdout " + e.Message, StatusKind.Error); Trace.TraceInformation(e.ToString()); return false; } @@ -1592,7 +1610,7 @@ namespace Microsoft.Research.Calypso.JobObjectModel string line = sr.ReadLine(); if (!line.Contains("DryadProfiler")) continue; - CosmosLogEntry le = new CosmosLogEntry(line); + DryadLogEntry le = new DryadLogEntry(line); if (le.Subsystem != "DryadProfiler") continue; if (!le.Message.EndsWith("channel status")) continue; @@ -1905,7 +1923,8 @@ namespace Microsoft.Research.Calypso.JobObjectModel /// /// Parse the query plan: cluster-specific. /// - protected abstract void ParseQueryPlan(); + /// Communication manager. + protected abstract void ParseQueryPlan(CommManager manager); int fictitiousStages; @@ -2009,11 +2028,11 @@ namespace Microsoft.Research.Calypso.JobObjectModel /// Factory: create the plan for a given job. /// /// Job to create plan for. - /// Delegate used to report errors. /// The plan or null. - public static DryadJobStaticPlan CreatePlan(DryadLinqJobInfo dryadLinqJobInfo, StatusReporter reporter) + /// Communication manager. + public static DryadJobStaticPlan CreatePlan(DryadLinqJobInfo dryadLinqJobInfo, CommManager manager) { - reporter("Trying to build static plan", StatusKind.LongOp); + manager.Status("Trying to build static plan", StatusKind.LongOp); ClusterConfiguration config = dryadLinqJobInfo.ClusterConfiguration; IClusterResidentObject file = config.JobQueryPlan(dryadLinqJobInfo.Summary); if (config is CacheClusterConfiguration) @@ -2025,12 +2044,12 @@ namespace Microsoft.Research.Calypso.JobObjectModel { retval = new DryadLinqJobStaticPlan(config, file.GetStream()); } - retval.ParseQueryPlan(); + retval.ParseQueryPlan(manager); return retval; } else { - reporter("Exception while looking for plan " + file.Exception.Message, StatusKind.Error); + manager.Status("Exception while looking for plan " + file.Exception.Message, StatusKind.Error); return null; } } @@ -2056,9 +2075,10 @@ namespace Microsoft.Research.Calypso.JobObjectModel /// /// Parse an XML query plan and represent that information. /// - protected override void ParseQueryPlan() + /// Communicaton manager. + protected override void ParseQueryPlan(CommManager manager) { - string planString = this.planStream.ReadToEnd(); + string planString = this.planStream.ReadToEnd(manager.Token); XDocument plan = XDocument.Parse(planString); // ReSharper disable PossibleNullReferenceException @@ -2104,7 +2124,7 @@ namespace Microsoft.Research.Calypso.JobObjectModel info.Arity = Connection.ConnectionType.AllToAll; break; default: - throw new CalypsoDryadException("Don't know about connection of type " + connection); + throw new DryadException("Don't know about connection of type " + connection); } switch (cht) { @@ -2118,7 +2138,7 @@ namespace Microsoft.Research.Calypso.JobObjectModel info.ChannelKind = Connection.ChannelType.Fifo; break; default: - throw new CalypsoDryadException("Don't know about channel of type " + cht); + throw new DryadException("Don't know about channel of type " + cht); } this.perNodeConnectionInfo.Add(stage.Id, info); } @@ -2247,14 +2267,15 @@ namespace Microsoft.Research.Calypso.JobObjectModel /// /// Parse the Algebra file. /// - private void ParseAlgebra() + /// Communication manager. + private void ParseAlgebra(CommManager manager) { // TODO: this parser is not really complete, as I don't understand the semantics of all xml elements. Dictionary outToStage = new Dictionary(); // map an output to a stage name. Assume that ios have unique names. Dictionary> inputs = new Dictionary>(); // ... - string planString = this.planStream.ReadToEnd(); + string planString = this.planStream.ReadToEnd(manager.Token); XDocument plan = XDocument.Parse(planString); // ReSharper disable PossibleNullReferenceException XElement graph = plan.Root.Element("graph"); // graph node, children are stages @@ -2416,13 +2437,13 @@ namespace Microsoft.Research.Calypso.JobObjectModel /// /// Parse the vertex definition file. /// - private void ParseVertexDef() + private void ParseVertexDef(CommManager manager) { if (this.vertexDef.Exception != null) return; // - string planString = this.vertexDef.ReadToEnd(); + string planString = this.vertexDef.ReadToEnd(manager.Token); XDocument vxDef = XDocument.Parse(planString); XElement vertices = vxDef.Root; @@ -2469,10 +2490,11 @@ namespace Microsoft.Research.Calypso.JobObjectModel /// /// Parse the query plan for a Scope job. /// - protected override void ParseQueryPlan() + /// Communication manager. + protected override void ParseQueryPlan(CommManager manager) { - this.ParseAlgebra(); - this.ParseVertexDef(); + this.ParseAlgebra(manager); + this.ParseVertexDef(manager); } } @@ -2653,9 +2675,13 @@ namespace Microsoft.Research.Calypso.JobObjectModel /// public string LocalPath { get; protected set; } /// - /// How big is the channel (0 if it cannot be determined, e.g. FIFO, -1 if the channel data cannot be retrieved). + /// The actual data read/written so far (0 if it cannot be determined, e.g. FIFO, -1 if the channel data cannot be retrieved). /// public long Size { get; set; } + /// + /// How much of the channel was + /// + public long TotalSize { get; set; } /// /// String representation of the endpoint. @@ -2664,7 +2690,10 @@ namespace Microsoft.Research.Calypso.JobObjectModel { string uritype = this.UriType; string localpath = this.LocalPath; - return string.Format("{0,4} {1,20:N0} {2}://{3}", this.Number, this.Size, uritype, localpath); + if (this.TotalSize == 0) + return string.Format("{0,4} {1,20:N0} {2}://{3}", this.Number, this.Size, uritype, localpath); + else + return string.Format("{0,4} {1,20:N0}/{2,20:N0} {3}://{4}", this.Number, this.Size, this.TotalSize, uritype, localpath); } /// @@ -2684,7 +2713,7 @@ namespace Microsoft.Research.Calypso.JobObjectModel int sepindex = uri.IndexOf("://"); if (sepindex < 0) - throw new CalypsoDryadException("Channel URI " + uri + " does not contain separator ://"); + throw new DryadException("Channel URI " + uri + " does not contain separator ://"); this.UriType = uri.Substring(0, sepindex); // some HPC URIs use the compression scheme as an "option" (not really defined for file:// uris, but...) @@ -2737,10 +2766,10 @@ namespace Microsoft.Research.Calypso.JobObjectModel this.Number = number; int sepindex = uri.IndexOf("://"); if (sepindex < 0) - throw new CalypsoClusterException("Channel URI " + uri + " does not contain separator ://"); + throw new ClusterException("Channel URI " + uri + " does not contain separator ://"); this.UriType = uri.Substring(0, sepindex); this.LocalPath = uri.Substring(sepindex + 3); - this.Size = size; + this.TotalSize = size; } } @@ -2883,7 +2912,7 @@ namespace Microsoft.Research.Calypso.JobObjectModel this.StdoutFile = job.ClusterConfiguration.ProcessStdoutFile(this.ProcessIdentifier, false, machine, job.Summary); this.SetState(VertexState.Started); if (approxStartTime == DateTime.MinValue) - throw new CalypsoDryadException("Unexpected small start time for vertex"); + throw new DryadException("Unexpected small start time for vertex"); this.LogDirectory = job.ClusterConfiguration.ProcessLogDirectory(this.ProcessIdentifier, false, machine, job.Summary); this.LogFilesPattern = job.ClusterConfiguration.VertexLogFilesPattern(false, job.Summary); this.UniqueID = uniqueId; @@ -3049,7 +3078,7 @@ namespace Microsoft.Research.Calypso.JobObjectModel { Trace.TraceInformation("Vertex {0} which is not started is still running?", this.Name); return; - //throw new CalypsoClusterException("Vertex which is not started is still running?"); + //throw new ClusterException("Vertex which is not started is still running?"); } if (this.Start > when) // This can happen if the cluster clocks are not synchronized with the local machine clocks. @@ -3088,7 +3117,7 @@ namespace Microsoft.Research.Calypso.JobObjectModel TimeSpan total = TimeSpan.FromSeconds(totSeconds); DateTime totalTime = creation + total; if (totSeconds < 0) - throw new CalypsoDryadException("Negative total time for vertex " + this.Name); + throw new DryadException("Negative total time for vertex " + this.Name); // if the vertex has no machine just ignore the times if (string.IsNullOrEmpty(this.Machine)) @@ -3209,9 +3238,8 @@ namespace Microsoft.Research.Calypso.JobObjectModel /// If the channel is an output, prefix the path with this; this is null for inputs. /// If true, do not return anything (still useful to advance the stream reader). /// If true the channel sizes are not discovered; this is much faster, since no remote machines are queried for files. - /// Delegate used to report progress. - /// Delegate used to report errors. - private Dictionary DiscoverOriginalInfoChannels(ISharedStreamReader sr, string uriprefix, bool skip, bool fast, StatusReporter reporter, Action updateProgress) + /// Communication manager. + private Dictionary DiscoverOriginalInfoChannels(ISharedStreamReader sr, string uriprefix, bool skip, bool fast, CommManager manager) { bool isInput = uriprefix == null; @@ -3231,21 +3259,18 @@ namespace Microsoft.Research.Calypso.JobObjectModel string channel = sr.ReadLine(); if (channel == null) { - if (updateProgress != null) - updateProgress(100); + manager.Progress(100); return null; } if (!skip) { - ChannelEndpointDescription desc = new ChannelEndpointDescription(isInput, i, channel, uriprefix, fast, reporter); + ChannelEndpointDescription desc = new ChannelEndpointDescription(isInput, i, channel, uriprefix, fast, manager.Status); channels.Add(i, desc); - if (updateProgress != null) - updateProgress(i * 100 / channelCount); + manager.Progress(i * 100 / channelCount); } } - if (updateProgress != null) - updateProgress(100); + manager.Progress(100); if (skip) return null; return channels; @@ -3258,9 +3283,8 @@ namespace Microsoft.Research.Calypso.JobObjectModel /// If true discover the inputs. /// If true discover the outputs. /// If true do not discover the channel sizes (much faster). - /// Delegate used to report progress. - /// Delegate used to report errors. - public bool DiscoverOriginalInfoChannels(bool inputs, bool outputs, bool fast, StatusReporter reporter, Action progress) + /// Communication manager. + public bool DiscoverOriginalInfoChannels(bool inputs, bool outputs, bool fast, CommManager manager) { string filename = string.Format("vertex-{0}-{1}-rerun-originalInfo.txt", this.Number, this.Version); bool success = true; @@ -3270,7 +3294,7 @@ namespace Microsoft.Research.Calypso.JobObjectModel // skip discovery inputs = false; ISharedStreamReader sr = this.WorkDirectory.GetFile(filename).GetStream(); - var channels = this.DiscoverOriginalInfoChannels(sr, null, !inputs, fast, reporter, progress); + var channels = this.DiscoverOriginalInfoChannels(sr, null, !inputs, fast, manager); if (channels == null) { if (inputs) @@ -3281,7 +3305,7 @@ namespace Microsoft.Research.Calypso.JobObjectModel if (this.OutputChannels != null) // skip discovery outputs = false; - channels = this.DiscoverOriginalInfoChannels(sr, this.WorkDirectory.ToString(), !outputs, fast, reporter, progress); + channels = this.DiscoverOriginalInfoChannels(sr, this.WorkDirectory.ToString(), !outputs, fast, manager); if (channels == null) { if (outputs) @@ -3300,28 +3324,27 @@ namespace Microsoft.Research.Calypso.JobObjectModel /// If true discover the inputs. /// If true discover the outputs. /// If true do not discover the channel sizes (much faster). - /// Delegate used to report progress. - /// Delegate used to report errors. + /// Communication manager. // ReSharper disable UnusedParameter.Global - public bool DiscoverScopeChannels(bool inputs, bool outputs, bool fast, StatusReporter reporter, Action progress) + public bool DiscoverScopeChannels(bool inputs, bool outputs, bool fast, CommManager manager) // ReSharper restore UnusedParameter.Global { // find the xml file var files = this.WorkDirectory.GetFilesAndFolders("vcmdStart*.xml").ToList(); if (files.Count != 1) { - reporter("Cannot locate vcmdStart*.xml file", StatusKind.Error); + manager.Status("Cannot locate vcmdStart*.xml file", StatusKind.Error); return false; } ISharedStreamReader sr = files.First().GetStream(); if (sr.Exception != null) { - reporter("Error reading vcmdStart*.xml file" + sr.Exception.Message, StatusKind.Error); + manager.Status("Error reading vcmdStart*.xml file" + sr.Exception.Message, StatusKind.Error); return false; } // ReSharper disable PossibleNullReferenceException - XDocument plan = XDocument.Parse(sr.ReadToEnd()); + XDocument plan = XDocument.Parse(sr.ReadToEnd(manager.Token)); if (inputs && this.InputChannels == null) { var channels = new Dictionary(); @@ -3365,9 +3388,8 @@ namespace Microsoft.Research.Calypso.JobObjectModel /// If true discover the inputs. /// If true discover the outputs. /// If true do not discover the channel sizes (much faster). - /// Delegate used to report progress. - /// Delegate used to report errors. - public bool DiscoverChannels(bool inputs, bool outputs, bool fast, StatusReporter reporter, Action progress) + /// Communication manager. + public bool DiscoverChannels(bool inputs, bool outputs, bool fast, CommManager manager) { // check if the result is already cached if ((this.InputChannels != null || !inputs) && @@ -3395,7 +3417,7 @@ namespace Microsoft.Research.Calypso.JobObjectModel if (wd is UNCFile) { - result = this.DiscoverOriginalInfoChannels(inputs, outputs, fast, reporter, progress); + result = this.DiscoverOriginalInfoChannels(inputs, outputs, fast, manager); } else { @@ -3448,9 +3470,9 @@ namespace Microsoft.Research.Calypso.JobObjectModel internal void Update(string name, string guid) { if (this.State != VertexState.Cancelled && this.State != VertexState.Abandoned) - throw new CalypsoDryadException("Updating a non-cancelled/abandoned vertex"); + throw new DryadException("Updating a non-cancelled/abandoned vertex"); if (this.Name != name) - throw new CalypsoDryadException("Vertex changed name"); + throw new DryadException("Vertex changed name"); this.UniqueID = guid; this.SetState(VertexState.Created); // the stdoutfile is expected to change, so I don't invalidate the cache diff --git a/JobBrowser/JOM/packages.config b/JobBrowser/JOM/packages.config index 6cc3081..bd29d67 100644 --- a/JobBrowser/JOM/packages.config +++ b/JobBrowser/JOM/packages.config @@ -1,13 +1,21 @@ - + + + + - + + + + + + - + \ No newline at end of file diff --git a/JobBrowser/JOM/storage.cs b/JobBrowser/JOM/storage.cs index 8463362..c3ca06c 100644 --- a/JobBrowser/JOM/storage.cs +++ b/JobBrowser/JOM/storage.cs @@ -22,13 +22,13 @@ limitations under the License. #undef USE_DSC #undef USE_TIDYFS -using Microsoft.Research.Calypso.Tools; using System; using System.Collections.Generic; using System.IO; using System.Linq; +using Microsoft.Research.Tools; -namespace Microsoft.Research.Calypso.JobObjectModel +namespace Microsoft.Research.JobObjectModel { /// @@ -197,7 +197,7 @@ namespace Microsoft.Research.Calypso.JobObjectModel { var sourceInputs = plan.GetStageConnections(source, true).ToList(); if (sourceInputs.Count() != 1) - throw new CalypsoDryadException("Unexpected number of inputs for stage " + source.Name); + throw new DryadException("Unexpected number of inputs for stage " + source.Name); source = sourceInputs.First().From; } diff --git a/JobBrowser/JobBrowser.sln b/JobBrowser/JobBrowser.sln index 6134b2b..4f11530 100644 --- a/JobBrowser/JobBrowser.sln +++ b/JobBrowser/JobBrowser.sln @@ -1,8 +1,6 @@  Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio 2013 -VisualStudioVersion = 12.0.30110.0 -MinimumVisualStudioVersion = 10.0.40219.1 +# Visual Studio 2012 Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "JobBrowser", "JobBrowser\JobBrowser.csproj", "{EDDD2E0B-A52B-4E25-9436-B874017673FF}" EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Tools", "Tools\Tools.csproj", "{20B91AAF-AAD4-47DF-9F1D-494DE6E066F9}" @@ -13,72 +11,26 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "UsefulForms", "UsefulForms\ EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution - Debug|Any CPU = Debug|Any CPU - Debug|Mixed Platforms = Debug|Mixed Platforms Debug|x64 = Debug|x64 - Debug|x86 = Debug|x86 - Release|Any CPU = Release|Any CPU - Release|Mixed Platforms = Release|Mixed Platforms Release|x64 = Release|x64 - Release|x86 = Release|x86 EndGlobalSection GlobalSection(ProjectConfigurationPlatforms) = postSolution - {EDDD2E0B-A52B-4E25-9436-B874017673FF}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {EDDD2E0B-A52B-4E25-9436-B874017673FF}.Debug|Any CPU.Build.0 = Debug|Any CPU - {EDDD2E0B-A52B-4E25-9436-B874017673FF}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU - {EDDD2E0B-A52B-4E25-9436-B874017673FF}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU {EDDD2E0B-A52B-4E25-9436-B874017673FF}.Debug|x64.ActiveCfg = Debug|x64 {EDDD2E0B-A52B-4E25-9436-B874017673FF}.Debug|x64.Build.0 = Debug|x64 - {EDDD2E0B-A52B-4E25-9436-B874017673FF}.Debug|x86.ActiveCfg = Debug|Any CPU - {EDDD2E0B-A52B-4E25-9436-B874017673FF}.Release|Any CPU.ActiveCfg = Release|Any CPU - {EDDD2E0B-A52B-4E25-9436-B874017673FF}.Release|Any CPU.Build.0 = Release|Any CPU - {EDDD2E0B-A52B-4E25-9436-B874017673FF}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU - {EDDD2E0B-A52B-4E25-9436-B874017673FF}.Release|Mixed Platforms.Build.0 = Release|Any CPU {EDDD2E0B-A52B-4E25-9436-B874017673FF}.Release|x64.ActiveCfg = Release|x64 {EDDD2E0B-A52B-4E25-9436-B874017673FF}.Release|x64.Build.0 = Release|x64 - {EDDD2E0B-A52B-4E25-9436-B874017673FF}.Release|x86.ActiveCfg = Release|Any CPU - {20B91AAF-AAD4-47DF-9F1D-494DE6E066F9}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {20B91AAF-AAD4-47DF-9F1D-494DE6E066F9}.Debug|Any CPU.Build.0 = Debug|Any CPU - {20B91AAF-AAD4-47DF-9F1D-494DE6E066F9}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU - {20B91AAF-AAD4-47DF-9F1D-494DE6E066F9}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU {20B91AAF-AAD4-47DF-9F1D-494DE6E066F9}.Debug|x64.ActiveCfg = Debug|x64 {20B91AAF-AAD4-47DF-9F1D-494DE6E066F9}.Debug|x64.Build.0 = Debug|x64 - {20B91AAF-AAD4-47DF-9F1D-494DE6E066F9}.Debug|x86.ActiveCfg = Debug|Any CPU - {20B91AAF-AAD4-47DF-9F1D-494DE6E066F9}.Release|Any CPU.ActiveCfg = Release|Any CPU - {20B91AAF-AAD4-47DF-9F1D-494DE6E066F9}.Release|Any CPU.Build.0 = Release|Any CPU - {20B91AAF-AAD4-47DF-9F1D-494DE6E066F9}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU - {20B91AAF-AAD4-47DF-9F1D-494DE6E066F9}.Release|Mixed Platforms.Build.0 = Release|Any CPU {20B91AAF-AAD4-47DF-9F1D-494DE6E066F9}.Release|x64.ActiveCfg = Release|x64 {20B91AAF-AAD4-47DF-9F1D-494DE6E066F9}.Release|x64.Build.0 = Release|x64 - {20B91AAF-AAD4-47DF-9F1D-494DE6E066F9}.Release|x86.ActiveCfg = Release|Any CPU - {77739535-7FAC-4487-887F-FEBA197E7572}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {77739535-7FAC-4487-887F-FEBA197E7572}.Debug|Any CPU.Build.0 = Debug|Any CPU - {77739535-7FAC-4487-887F-FEBA197E7572}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU - {77739535-7FAC-4487-887F-FEBA197E7572}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU {77739535-7FAC-4487-887F-FEBA197E7572}.Debug|x64.ActiveCfg = Debug|x64 {77739535-7FAC-4487-887F-FEBA197E7572}.Debug|x64.Build.0 = Debug|x64 - {77739535-7FAC-4487-887F-FEBA197E7572}.Debug|x86.ActiveCfg = Debug|Any CPU - {77739535-7FAC-4487-887F-FEBA197E7572}.Release|Any CPU.ActiveCfg = Release|Any CPU - {77739535-7FAC-4487-887F-FEBA197E7572}.Release|Any CPU.Build.0 = Release|Any CPU - {77739535-7FAC-4487-887F-FEBA197E7572}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU - {77739535-7FAC-4487-887F-FEBA197E7572}.Release|Mixed Platforms.Build.0 = Release|Any CPU {77739535-7FAC-4487-887F-FEBA197E7572}.Release|x64.ActiveCfg = Release|x64 {77739535-7FAC-4487-887F-FEBA197E7572}.Release|x64.Build.0 = Release|x64 - {77739535-7FAC-4487-887F-FEBA197E7572}.Release|x86.ActiveCfg = Release|Any CPU - {27635A68-ADFC-4192-9262-B4E6ECDDCE09}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {27635A68-ADFC-4192-9262-B4E6ECDDCE09}.Debug|Any CPU.Build.0 = Debug|Any CPU - {27635A68-ADFC-4192-9262-B4E6ECDDCE09}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU - {27635A68-ADFC-4192-9262-B4E6ECDDCE09}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU {27635A68-ADFC-4192-9262-B4E6ECDDCE09}.Debug|x64.ActiveCfg = Debug|x64 {27635A68-ADFC-4192-9262-B4E6ECDDCE09}.Debug|x64.Build.0 = Debug|x64 - {27635A68-ADFC-4192-9262-B4E6ECDDCE09}.Debug|x86.ActiveCfg = Debug|Any CPU - {27635A68-ADFC-4192-9262-B4E6ECDDCE09}.Release|Any CPU.ActiveCfg = Release|Any CPU - {27635A68-ADFC-4192-9262-B4E6ECDDCE09}.Release|Any CPU.Build.0 = Release|Any CPU - {27635A68-ADFC-4192-9262-B4E6ECDDCE09}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU - {27635A68-ADFC-4192-9262-B4E6ECDDCE09}.Release|Mixed Platforms.Build.0 = Release|Any CPU {27635A68-ADFC-4192-9262-B4E6ECDDCE09}.Release|x64.ActiveCfg = Release|x64 {27635A68-ADFC-4192-9262-B4E6ECDDCE09}.Release|x64.Build.0 = Release|x64 - {27635A68-ADFC-4192-9262-B4E6ECDDCE09}.Release|x86.ActiveCfg = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/JobBrowser/JobBrowser/ClusterBrowser.cs b/JobBrowser/JobBrowser/ClusterBrowser.cs index b21c432..4e95ea6 100644 --- a/JobBrowser/JobBrowser/ClusterBrowser.cs +++ b/JobBrowser/JobBrowser/ClusterBrowser.cs @@ -28,11 +28,11 @@ using System.Linq; using System.Net; using System.Windows.Forms; using System.Diagnostics; -using Microsoft.Research.Calypso.JobObjectModel; -using Microsoft.Research.Calypso.Tools; -using Microsoft.Research.Calypso.UsefulForms; +using Microsoft.Research.JobObjectModel; +using Microsoft.Research.Tools; +using Microsoft.Research.UsefulForms; -namespace Microsoft.Research.Calypso.DryadAnalysis +namespace Microsoft.Research.DryadAnalysis { /// /// Class to browse jobs on cluster, copy, summarize and start visualization. @@ -73,7 +73,6 @@ namespace Microsoft.Research.Calypso.DryadAnalysis private TextWriterTraceListener LogFile; private BackgroundWorkQueue queue; - private BackgroundWorker queueWorker; /// /// Jobs from the cluster. @@ -92,8 +91,8 @@ namespace Microsoft.Research.Calypso.DryadAnalysis this.InitializeComponent(); this.status = new StatusWriter(this.statuslabel, this.statusStrip, this.Status); - this.queueWorker = new BackgroundWorker(); - this.queue = new BackgroundWorkQueue(this.queueWorker); + BackgroundWorker queueWorker = new BackgroundWorker(); + this.queue = new BackgroundWorkQueue(queueWorker, null, null); this.completeJobsList = new List(); this.refreshTimer = new Timer(); @@ -235,7 +234,7 @@ namespace Microsoft.Research.Calypso.DryadAnalysis this.toolStripMenuItem_job.Enabled = true; var item = new BackgroundWorkItem>( - (s, p) => BuildClusterJobList(s, p, this.clusterStatus, this.SelectedVirtualCluster), + m => BuildClusterJobList(m, this.clusterStatus, this.SelectedVirtualCluster), this.JobListRetrieved, "getJobs"); this.Queue(item); @@ -250,6 +249,8 @@ namespace Microsoft.Research.Calypso.DryadAnalysis private void JobListRetrieved(bool cancelled, List jobs) { + if (cancelled) return; + this.filteredDataGridView.DataGridView.ClearSelection(); this.completeJobsList = jobs; this.clusterJobs.SetItems(this.completeJobsList); @@ -274,12 +275,11 @@ namespace Microsoft.Research.Calypso.DryadAnalysis /// Talk to the web server and build the list of clustr jobs; used it to populate the upper panel. /// /// Virtual cluster selected; defined only for Scope clusters. - /// Reports progress. - /// Reports status. + /// Communication manager. /// Cluster to scan. - private static List BuildClusterJobList(StatusReporter reporter, Action progress, ClusterStatus status, string virtualCluster) + private static List BuildClusterJobList(CommManager manager, ClusterStatus status, string virtualCluster) { - return status.GetClusterJobList(virtualCluster, reporter, progress).ToList(); + return status.GetClusterJobList(virtualCluster, manager).ToList(); } /// @@ -311,7 +311,9 @@ namespace Microsoft.Research.Calypso.DryadAnalysis if (js == null) return; - DryadLinqJobInfo job = DryadLinqJobInfo.CreateDryadLinqJobInfo(this.clusterStatus.Config, js, false, this.Status, delegate {}); + // TODO: this should run in the background + CommManager manager = new CommManager(this.Status, delegate { }, new System.Threading.CancellationTokenSource().Token); + DryadLinqJobInfo job = DryadLinqJobInfo.CreateDryadLinqJobInfo(this.clusterStatus.Config, js, false, manager); if (job != null) { JobBrowser browser = new JobBrowser(job); @@ -366,7 +368,9 @@ namespace Microsoft.Research.Calypso.DryadAnalysis IEnumerable ti = this.SelectedJobs(); this.Status("Starting job browser...", StatusKind.LongOp); IEnumerable jobs = ti.Select(t => t.DiscoverDryadLinqJob(this.clusterStatus, this.Status)).ToList(); - IEnumerable detailed = jobs.Select(j => DryadLinqJobInfo.CreateDryadLinqJobInfo(this.clusterStatus.Config, j, false, this.Status, delegate { })); + + CommManager manager = new CommManager(this.Status, delegate { }, new System.Threading.CancellationTokenSource().Token); + IEnumerable detailed = jobs.Select(j => DryadLinqJobInfo.CreateDryadLinqJobInfo(this.clusterStatus.Config, j, false, manager)); foreach (DryadLinqJobInfo j in detailed) { if (j == null) continue; @@ -442,7 +446,7 @@ namespace Microsoft.Research.Calypso.DryadAnalysis IEnumerable jobs = todo.Select(j => j.DiscoverDryadLinqJob(this.clusterStatus, this.Status)).Where(j => j != null); var item = new BackgroundWorkItem( - (s, p) => ClusterWork.CancelJobs(jobs, this.clusterStatus, s, p), + m => ClusterWork.CancelJobs(jobs, this.clusterStatus, m), (c, b) => { }, "cancel"); this.Queue(item); @@ -504,8 +508,11 @@ namespace Microsoft.Research.Calypso.DryadAnalysis this.autoRefreshToolStripMenuItem.Checked = this.formSettings.AutoRefresh; this.AddClusterNameToMenu(""); + this.AddClusterNameToMenu(""); ClusterConfiguration.ReconstructKnownCluster(this.formSettings.KnownClusters); + + int found = 0; IEnumerable clusters = ClusterConfiguration.GetKnownClusterNames(); foreach (string c in clusters) { @@ -515,7 +522,12 @@ namespace Microsoft.Research.Calypso.DryadAnalysis { (config as CacheClusterConfiguration).StartCaching(); } + found++; } + + if (found == 0) + // try to find them by scanning + this.ScanClusters(); } /// @@ -539,6 +551,11 @@ namespace Microsoft.Research.Calypso.DryadAnalysis newItem.Click += this.AddNewCluster; return; } + if (clusterName == "") + { + newItem.Click += this.ScanClusters; + return; + } var selItem = newItem.DropDownItems.Add("Select"); var delItem = newItem.DropDownItems.Add("Delete"); @@ -548,6 +565,31 @@ namespace Microsoft.Research.Calypso.DryadAnalysis editItem.Click += editItem_Click; } + /// + /// Scan the clusters we are subscribed to and add them to the list of known clusters. + /// + /// Unused. + /// Unused. + private void ScanClusters(object sender, EventArgs e) + { + this.ScanClusters(); + } + + /// + /// Scan the clusters we are subscribed to and add them to the list of known clusters. + /// + private void ScanClusters() + { + this.Status("Scanning for known clusters", StatusKind.LongOp); + foreach (var conf in ClusterConfiguration.EnumerateSubscribedClusters()) + { + ClusterConfiguration.AddKnownCluster(conf); + this.AddClusterNameToMenu(conf.Name); + this.Status("Adding cluster " + conf.Name, StatusKind.OK); + } + this.Status("Scan completed", StatusKind.OK); + } + /// /// Edit a cluster. /// @@ -684,7 +726,7 @@ namespace Microsoft.Research.Calypso.DryadAnalysis IEnumerable jobs = todo.Select(j => j.DiscoverDryadLinqJob(this.clusterStatus, this.Status)).Where(j => j != null); var item = new BackgroundWorkItem>( - (s, p) => ClusterWork.DiagnoseJobs(jobs, this.clusterStatus.Config, s, p), + m => ClusterWork.DiagnoseJobs(jobs, this.clusterStatus.Config, m), DiagnosisResult.ShowDiagnosisResult, "cancel"); this.Queue(item); @@ -895,18 +937,18 @@ namespace Microsoft.Research.Calypso.DryadAnalysis /// /// Jobs to cancel. /// Cluster where the jobs are running. - /// Delegate used to report errors. /// True if all cancellations succeed. - /// Delegate used to report progress. + /// Communicatoni manager. // ReSharper disable once UnusedParameter.Global - public static bool CancelJobs(IEnumerable jobs, ClusterStatus cluster, StatusReporter statusReporter, Action updateProgress) + public static bool CancelJobs(IEnumerable jobs, ClusterStatus cluster, CommManager manager) { bool done = true; foreach (DryadLinqJobSummary job in jobs) { + manager.Token.ThrowIfCancellationRequested(); if (job.Status != ClusterJobInformation.ClusterJobStatus.Running) { - statusReporter("Job " + job.Name + " does not appear to be running; will still try to cancel", StatusKind.Error); + manager.Status("Job " + job.Name + " does not appear to be running; will still try to cancel", StatusKind.Error); } bool success; @@ -923,9 +965,9 @@ namespace Microsoft.Research.Calypso.DryadAnalysis } if (success) - statusReporter("Job " + job.Name + " cancelled", StatusKind.OK); + manager.Status("Job " + job.Name + " cancelled", StatusKind.OK); else - statusReporter("Cancellation of " + job.Name + " failed " + reason, StatusKind.Error); + manager.Status("Cancellation of " + job.Name + " failed " + reason, StatusKind.Error); done &= success; } return done; @@ -936,9 +978,8 @@ namespace Microsoft.Research.Calypso.DryadAnalysis /// /// Jobs to diagnose. /// Cluster configuration. - /// Delegate used to report errors. - /// Delegate used to report progress. - public static List DiagnoseJobs(IEnumerable jobs, ClusterConfiguration config, StatusReporter reporter, Action updateProgress) + /// Communicatino manager. + public static List DiagnoseJobs(IEnumerable jobs, ClusterConfiguration config, CommManager manager) { var dryadLinqJobSummaries = jobs as DryadLinqJobSummary[] ?? jobs.ToArray(); int jobCount = dryadLinqJobSummaries.Count(); @@ -949,15 +990,16 @@ namespace Microsoft.Research.Calypso.DryadAnalysis { if (summary == null) continue; - JobFailureDiagnosis diagnosis = JobFailureDiagnosis.CreateJobFailureDiagnosis(config, summary, reporter, updateProgress); - reporter("Diagnosing " + summary.ShortName(), StatusKind.LongOp); + manager.Token.ThrowIfCancellationRequested(); + JobFailureDiagnosis diagnosis = JobFailureDiagnosis.CreateJobFailureDiagnosis(config, summary, manager); + manager.Status("Diagnosing " + summary.ShortName(), StatusKind.LongOp); DiagnosisLog log = diagnosis.Diagnose(); result.Add(log); done++; - updateProgress(done * 100 / jobCount); + manager.Progress(done * 100 / jobCount); } - reporter("Diagnosis complete", StatusKind.OK); + manager.Status("Diagnosis complete", StatusKind.OK); return result; } } diff --git a/JobBrowser/JobBrowser/ClusterBrowser.designer.cs b/JobBrowser/JobBrowser/ClusterBrowser.designer.cs index e2b4681..7347464 100644 --- a/JobBrowser/JobBrowser/ClusterBrowser.designer.cs +++ b/JobBrowser/JobBrowser/ClusterBrowser.designer.cs @@ -18,9 +18,9 @@ See the Apache Version 2.0 License for specific language governing permissions a limitations under the License. */ -using Microsoft.Research.Calypso.Tools; +using Microsoft.Research.Tools; -namespace Microsoft.Research.Calypso.DryadAnalysis +namespace Microsoft.Research.DryadAnalysis { partial class ClusterBrowser { @@ -56,10 +56,9 @@ namespace Microsoft.Research.Calypso.DryadAnalysis this.openInJobBrowserToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem(); this.diagnoseToolStripMenuItem1 = new System.Windows.Forms.ToolStripMenuItem(); this.terminateToolStripMenuItem1 = new System.Windows.Forms.ToolStripMenuItem(); + this.cancelToolStripMenuItem1 = new System.Windows.Forms.ToolStripMenuItem(); this.statusStrip = new System.Windows.Forms.StatusStrip(); this.statuslabel = new System.Windows.Forms.ToolStripStatusLabel(); - this.toolStripStatusLabel_backgroundWork = new System.Windows.Forms.ToolStripStatusLabel(); - this.toolStripStatusLabel_currentWork = new System.Windows.Forms.ToolStripStatusLabel(); this.toolStripProgressBar = new System.Windows.Forms.ToolStripProgressBar(); this.flowLayoutPanel_header = new System.Windows.Forms.FlowLayoutPanel(); this.label_vc = new System.Windows.Forms.Label(); @@ -78,7 +77,7 @@ namespace Microsoft.Research.Calypso.DryadAnalysis this.logFileToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem(); this.autoRefreshToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem(); this.clusterToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem(); - this.filteredDataGridView = new Microsoft.Research.Calypso.Tools.FilteredDataGridView(); + this.filteredDataGridView = new FilteredDataGridView(); this.contextMenuStrip_job.SuspendLayout(); this.statusStrip.SuspendLayout(); this.flowLayoutPanel_header.SuspendLayout(); @@ -92,7 +91,7 @@ namespace Microsoft.Research.Calypso.DryadAnalysis this.diagnoseToolStripMenuItem1, this.terminateToolStripMenuItem1}); this.contextMenuStrip_job.Name = "contextMenuStrip_job"; - this.contextMenuStrip_job.Size = new System.Drawing.Size(182, 92); + this.contextMenuStrip_job.Size = new System.Drawing.Size(182, 70); // // openInJobBrowserToolStripMenuItem // @@ -114,15 +113,19 @@ namespace Microsoft.Research.Calypso.DryadAnalysis this.terminateToolStripMenuItem1.Name = "terminateToolStripMenuItem1"; this.terminateToolStripMenuItem1.Size = new System.Drawing.Size(181, 22); this.terminateToolStripMenuItem1.Text = "Terminate"; - this.terminateToolStripMenuItem1.Visible = false; this.terminateToolStripMenuItem1.Click += new System.EventHandler(this.terminateToolStripMenuItem_Click); // + // cancelToolStripMenuItem1 + // + this.cancelToolStripMenuItem1.Name = "cancelToolStripMenuItem1"; + this.cancelToolStripMenuItem1.Size = new System.Drawing.Size(186, 22); + this.cancelToolStripMenuItem1.Text = "Cancel current work"; + this.cancelToolStripMenuItem1.Click += new System.EventHandler(this.cancelToolStripMenuItem_Click); + // // statusStrip // this.statusStrip.Items.AddRange(new System.Windows.Forms.ToolStripItem[] { this.statuslabel, - this.toolStripStatusLabel_backgroundWork, - this.toolStripStatusLabel_currentWork, this.toolStripProgressBar}); this.statusStrip.Location = new System.Drawing.Point(0, 431); this.statusStrip.Name = "statusStrip"; @@ -134,32 +137,16 @@ namespace Microsoft.Research.Calypso.DryadAnalysis // this.statuslabel.Name = "statuslabel"; this.statuslabel.Overflow = System.Windows.Forms.ToolStripItemOverflow.Never; - this.statuslabel.Size = new System.Drawing.Size(833, 17); + this.statuslabel.Size = new System.Drawing.Size(1026, 17); this.statuslabel.Spring = true; this.statuslabel.Text = "Status displayed here"; this.statuslabel.TextAlign = System.Drawing.ContentAlignment.MiddleLeft; // - // toolStripStatusLabel_backgroundWork - // - this.toolStripStatusLabel_backgroundWork.BorderStyle = System.Windows.Forms.Border3DStyle.SunkenOuter; - this.toolStripStatusLabel_backgroundWork.Name = "toolStripStatusLabel_backgroundWork"; - this.toolStripStatusLabel_backgroundWork.Overflow = System.Windows.Forms.ToolStripItemOverflow.Never; - this.toolStripStatusLabel_backgroundWork.Size = new System.Drawing.Size(109, 17); - this.toolStripStatusLabel_backgroundWork.Text = "0 pending activities"; - // - // toolStripStatusLabel_currentWork - // - this.toolStripStatusLabel_currentWork.BorderStyle = System.Windows.Forms.Border3DStyle.SunkenOuter; - this.toolStripStatusLabel_currentWork.Name = "toolStripStatusLabel_currentWork"; - this.toolStripStatusLabel_currentWork.Overflow = System.Windows.Forms.ToolStripItemOverflow.Never; - this.toolStripStatusLabel_currentWork.Size = new System.Drawing.Size(84, 17); - this.toolStripStatusLabel_currentWork.Text = "Doing nothing"; - // // toolStripProgressBar // this.toolStripProgressBar.Name = "toolStripProgressBar"; this.toolStripProgressBar.Size = new System.Drawing.Size(100, 16); - // + // // flowLayoutPanel_header // this.flowLayoutPanel_header.AutoSize = true; @@ -219,6 +206,7 @@ namespace Microsoft.Research.Calypso.DryadAnalysis // jobToolStripMenuItem_file // this.jobToolStripMenuItem_file.DropDownItems.AddRange(new System.Windows.Forms.ToolStripItem[] { + this.cancelToolStripMenuItem1, this.newWindowToolStripMenuItem, this.refreshToolStripMenuItem1, this.exitToolStripMenuItem1}); @@ -264,7 +252,6 @@ namespace Microsoft.Research.Calypso.DryadAnalysis this.toolStripMenuItem_job.Size = new System.Drawing.Size(37, 20); this.toolStripMenuItem_job.Text = "&Job"; this.toolStripMenuItem_job.ToolTipText = "View job informaotion in detail."; - this.toolStripMenuItem_job.Visible = false; // // jobBrowserToolStripMenuItem // @@ -279,6 +266,7 @@ namespace Microsoft.Research.Calypso.DryadAnalysis this.diagnoseToolStripMenuItem.Size = new System.Drawing.Size(163, 22); this.diagnoseToolStripMenuItem.Text = "Diagnose"; this.diagnoseToolStripMenuItem.ToolTipText = "Attempt to diagnose job failures."; + this.diagnoseToolStripMenuItem.Visible = false; this.diagnoseToolStripMenuItem.Click += new System.EventHandler(this.diagnoseToolStripMenuItem_Click); // // terminateToolStripMenuItem @@ -295,6 +283,7 @@ namespace Microsoft.Research.Calypso.DryadAnalysis this.openFromURLToolStripMenuItem.Size = new System.Drawing.Size(163, 22); this.openFromURLToolStripMenuItem.Text = "Open job URL..."; this.openFromURLToolStripMenuItem.ToolTipText = "Open the job given a URL."; + this.openFromURLToolStripMenuItem.Visible = false; this.openFromURLToolStripMenuItem.Click += new System.EventHandler(this.openFromURLToolStripMenuItem_Click); // // settingsToolStripMenuItem @@ -309,7 +298,7 @@ namespace Microsoft.Research.Calypso.DryadAnalysis // logFileToolStripMenuItem // this.logFileToolStripMenuItem.Name = "logFileToolStripMenuItem"; - this.logFileToolStripMenuItem.Size = new System.Drawing.Size(139, 22); + this.logFileToolStripMenuItem.Size = new System.Drawing.Size(152, 22); this.logFileToolStripMenuItem.Text = "Log file"; this.logFileToolStripMenuItem.ToolTipText = "When enabled logs errors in the selected file."; this.logFileToolStripMenuItem.Click += new System.EventHandler(this.logFileToolStripMenuItem_Click); @@ -317,7 +306,7 @@ namespace Microsoft.Research.Calypso.DryadAnalysis // autoRefreshToolStripMenuItem // this.autoRefreshToolStripMenuItem.Name = "autoRefreshToolStripMenuItem"; - this.autoRefreshToolStripMenuItem.Size = new System.Drawing.Size(139, 22); + this.autoRefreshToolStripMenuItem.Size = new System.Drawing.Size(152, 22); this.autoRefreshToolStripMenuItem.Text = "Auto refresh"; this.autoRefreshToolStripMenuItem.Click += new System.EventHandler(this.autoRefreshToolStripMenuItem_Click); // @@ -326,15 +315,15 @@ namespace Microsoft.Research.Calypso.DryadAnalysis this.clusterToolStripMenuItem.Name = "clusterToolStripMenuItem"; this.clusterToolStripMenuItem.Size = new System.Drawing.Size(56, 20); this.clusterToolStripMenuItem.Text = "Cluster"; - + // // filteredDataGridView // this.filteredDataGridView.BorderStyle = System.Windows.Forms.BorderStyle.FixedSingle; this.filteredDataGridView.ContextMenuStrip = this.contextMenuStrip_job; this.filteredDataGridView.Dock = System.Windows.Forms.DockStyle.Fill; - this.filteredDataGridView.Location = new System.Drawing.Point(0, 56); + this.filteredDataGridView.Location = new System.Drawing.Point(0, 24); this.filteredDataGridView.Name = "filteredDataGridView"; - this.filteredDataGridView.Size = new System.Drawing.Size(1143, 375); + this.filteredDataGridView.Size = new System.Drawing.Size(1143, 429); this.filteredDataGridView.TabIndex = 15; this.filteredDataGridView.CellFormatting += new System.Windows.Forms.DataGridViewCellFormattingEventHandler(this.filteredDataGridView_CellFormatting); this.filteredDataGridView.CellMouseDoubleClick += new System.Windows.Forms.DataGridViewCellMouseEventHandler(this.filteredDataGridView_CellMouseDoubleClick); @@ -365,6 +354,11 @@ namespace Microsoft.Research.Calypso.DryadAnalysis } + private void cancelToolStripMenuItem_Click(object sender, System.EventArgs e) + { + this.queue.CancelCurrentWork(); + } + #endregion private System.Windows.Forms.StatusStrip statusStrip; @@ -378,13 +372,12 @@ namespace Microsoft.Research.Calypso.DryadAnalysis private System.Windows.Forms.ToolStripMenuItem refreshToolStripMenuItem1; private System.Windows.Forms.ToolStripMenuItem openFromURLToolStripMenuItem; private System.Windows.Forms.ToolStripMenuItem terminateToolStripMenuItem; - private System.Windows.Forms.ToolStripStatusLabel toolStripStatusLabel_backgroundWork; - private System.Windows.Forms.ToolStripStatusLabel toolStripStatusLabel_currentWork; private System.Windows.Forms.ToolStripMenuItem diagnoseToolStripMenuItem; private System.Windows.Forms.ContextMenuStrip contextMenuStrip_job; private System.Windows.Forms.ToolStripMenuItem openInJobBrowserToolStripMenuItem; private System.Windows.Forms.ToolStripMenuItem diagnoseToolStripMenuItem1; private System.Windows.Forms.ToolStripMenuItem terminateToolStripMenuItem1; + private System.Windows.Forms.ToolStripMenuItem cancelToolStripMenuItem1; private System.Windows.Forms.ToolStripProgressBar toolStripProgressBar; private FilteredDataGridView filteredDataGridView; private System.Windows.Forms.ToolStripMenuItem newWindowToolStripMenuItem; diff --git a/JobBrowser/JobBrowser/ClusterBrowser.resx b/JobBrowser/JobBrowser/ClusterBrowser.resx index a3018f8..9ee9554 100644 --- a/JobBrowser/JobBrowser/ClusterBrowser.resx +++ b/JobBrowser/JobBrowser/ClusterBrowser.resx @@ -123,9 +123,6 @@ 17, 17 - - 253, 17 - 419, 16 diff --git a/JobBrowser/JobBrowser/ClusterConfigEditor.Designer.cs b/JobBrowser/JobBrowser/ClusterConfigEditor.Designer.cs index ac163ec..c05883e 100644 --- a/JobBrowser/JobBrowser/ClusterConfigEditor.Designer.cs +++ b/JobBrowser/JobBrowser/ClusterConfigEditor.Designer.cs @@ -18,7 +18,7 @@ See the Apache Version 2.0 License for specific language governing permissions a limitations under the License. */ -namespace Microsoft.Research.Calypso.JobObjectModel +namespace Microsoft.Research.JobObjectModel { partial class ClusterConfigEditor { diff --git a/JobBrowser/JobBrowser/ClusterConfigEditor.cs b/JobBrowser/JobBrowser/ClusterConfigEditor.cs index 7b50b7b..321daac 100644 --- a/JobBrowser/JobBrowser/ClusterConfigEditor.cs +++ b/JobBrowser/JobBrowser/ClusterConfigEditor.cs @@ -23,9 +23,9 @@ using System; using System.Collections.Generic; using System.Drawing; using System.Windows.Forms; -using Microsoft.Research.Calypso.Tools; +using Microsoft.Research.Tools; -namespace Microsoft.Research.Calypso.JobObjectModel +namespace Microsoft.Research.JobObjectModel { /// /// Editor for cluster configuration. diff --git a/JobBrowser/JobBrowser/Diagnosis.cs b/JobBrowser/JobBrowser/Diagnosis.cs index 4a47117..23d7b86 100644 --- a/JobBrowser/JobBrowser/Diagnosis.cs +++ b/JobBrowser/JobBrowser/Diagnosis.cs @@ -19,15 +19,15 @@ limitations under the License. */ -using Microsoft.Research.Calypso.JobObjectModel; -using Microsoft.Research.Calypso.Tools; using System; using System.IO; using System.Collections.Generic; using System.Linq; using System.Text.RegularExpressions; +using Microsoft.Research.JobObjectModel; +using Microsoft.Research.Tools; -namespace Microsoft.Research.Calypso.DryadAnalysis +namespace Microsoft.Research.DryadAnalysis { /// /// The result of a decision (ternary booleans?) @@ -236,30 +236,24 @@ namespace Microsoft.Research.Calypso.DryadAnalysis /// public DryadLinqJobInfo Job { get; protected set; } /// - /// Delegate used to report erorrs. + /// Communication manager. /// - public StatusReporter Reporter { get; protected set; } + public CommManager Manager { get; protected set; } /// /// Plan of the job. /// public DryadJobStaticPlan StaticPlan { get; protected set; } /// - /// Delegate used to report progress. - /// - public Action ProgressReporter { get; protected set; } - /// /// Create a FailureDiagnosis object. /// /// Job being diagnosed. /// Static plan of the job. - /// Delegate used to report errors. - /// Delegate used to report progress. - protected FailureDiagnosis(DryadLinqJobInfo job, DryadJobStaticPlan plan, StatusReporter reporter, Action progressReporter) + /// Communication manager. + protected FailureDiagnosis(DryadLinqJobInfo job, DryadJobStaticPlan plan, CommManager manager) { this.Job = job; this.StaticPlan = plan; - this.Reporter = reporter; - this.ProgressReporter = progressReporter; + this.Manager = manager; this.Summary = job.Summary; this.cluster = job.ClusterConfiguration; } @@ -267,19 +261,18 @@ namespace Microsoft.Research.Calypso.DryadAnalysis /// /// Try to find the job information from cluster and summary. /// - /// Delegate used to report status. - /// Delegate used to report progress. - protected void FindJobInfo(StatusReporter status, Action progress) + /// Communication manager. + protected void FindJobInfo(CommManager manager) { - DryadLinqJobInfo jobinfo = DryadLinqJobInfo.CreateDryadLinqJobInfo(this.cluster, this.Summary, true, status, progress); + DryadLinqJobInfo jobinfo = DryadLinqJobInfo.CreateDryadLinqJobInfo(this.cluster, this.Summary, true, manager); if (jobinfo == null) { - status("Cannot collect information for " + Summary.ShortName() + " to diagnose", StatusKind.Error); + manager.Status("Cannot collect information for " + Summary.ShortName() + " to diagnose", StatusKind.Error); return; } this.Job = jobinfo; - this.StaticPlan = JobObjectModel.DryadJobStaticPlan.CreatePlan(jobinfo, status); + this.StaticPlan = JobObjectModel.DryadJobStaticPlan.CreatePlan(jobinfo, manager); } /// @@ -287,15 +280,13 @@ namespace Microsoft.Research.Calypso.DryadAnalysis /// /// Cluster where job resides. /// Job summary. - /// Delegate used to report errors. - /// Delegate used to report progress. - protected FailureDiagnosis(ClusterConfiguration config, DryadLinqJobSummary summary, StatusReporter reporter, Action progressReporter) + /// Communication manager. + protected FailureDiagnosis(ClusterConfiguration config, DryadLinqJobSummary summary, CommManager manager) { this.cluster = config; this.Summary = summary; - this.Reporter = reporter; - this.ProgressReporter = progressReporter; - this.FindJobInfo(reporter, progressReporter); + this.Manager = manager; + this.FindJobInfo(manager); } /// @@ -327,11 +318,10 @@ namespace Microsoft.Research.Calypso.DryadAnalysis /// /// Vertex to diagnose. /// Job containing the vertex. - /// Delegate used to report progress. - /// Delegate used to report status. /// Plan of the executed job. - protected VertexFailureDiagnosis(DryadLinqJobInfo job, DryadJobStaticPlan plan, ExecutedVertexInstance vertex, StatusReporter reporter, Action progressReporter) - : base(job, plan, reporter, progressReporter) + /// Communication manager. + protected VertexFailureDiagnosis(DryadLinqJobInfo job, DryadJobStaticPlan plan, ExecutedVertexInstance vertex, CommManager manager) + : base(job, plan, manager) { this.Job = job; this.Vertex = vertex; @@ -344,15 +334,13 @@ namespace Microsoft.Research.Calypso.DryadAnalysis /// /// Vertex to diagnose. /// Job containing the vertex. - /// Delegate used to report progress. - /// Delegate used to report status. + /// Communication manager. /// A subclass of VertexFailureDiagnosis. /// Plan of the executed job. public static VertexFailureDiagnosis CreateVertexFailureDiagnosis(DryadLinqJobInfo job, DryadJobStaticPlan plan, - ExecutedVertexInstance vertex, - StatusReporter reporter, - Action progressReporter) + ExecutedVertexInstance vertex, + CommManager manager) { ClusterConfiguration config = job.ClusterConfiguration; if (config is CacheClusterConfiguration) @@ -381,7 +369,7 @@ namespace Microsoft.Research.Calypso.DryadAnalysis DiagnosisLog log = new DiagnosisLog(this.Job, this.Summary); log.AddMessage(new DiagnosisMessage(DiagnosisMessage.Importance.Final, "Diagnostic for " + this.VertexName, "Vertex state is " + this.Vertex.State)); this.Diagnose(log); - this.Reporter("Vertex diagnosis complete", StatusKind.OK); + this.Manager.Status("Vertex diagnosis complete", StatusKind.OK); return log; } @@ -409,7 +397,8 @@ namespace Microsoft.Research.Calypso.DryadAnalysis /// Detect whether the vertex had problems reading a particular channel. /// /// The channel that cannot be read, or null if that's not the problem. - public virtual ChannelEndpointDescription ChannelReadFailure() + /// Communication manager. + public virtual ChannelEndpointDescription ChannelReadFailure(CommManager manager) { List stack = this.StackTrace().ToList(); if (stack.Count == 0) @@ -424,7 +413,7 @@ namespace Microsoft.Research.Calypso.DryadAnalysis bool success = int.TryParse(m.Groups[3].Value, out channelNo); if (!success) return null; - this.Vertex.DiscoverChannels(true, false, true, this.Reporter, null); + this.Vertex.DiscoverChannels(true, false, true, manager); var channels = this.Vertex.InputChannels; if (channels == null) return null; @@ -604,11 +593,10 @@ namespace Microsoft.Research.Calypso.DryadAnalysis /// Create a class to diagnose the problems of a job. /// /// Job to diagnose. - /// Delegate used to report progress. - /// Delegate used to report status. /// Plan of the diagnosed job. - protected JobFailureDiagnosis(DryadLinqJobInfo job, DryadJobStaticPlan plan, StatusReporter status, Action progress) - : base(job, plan, status, progress) + /// Communication manager. + protected JobFailureDiagnosis(DryadLinqJobInfo job, DryadJobStaticPlan plan, CommManager manager) + : base(job, plan, manager) { this.diagnosisLog = new DiagnosisLog(job, job.Summary); this.jobManager = this.Job.ManagerVertex; @@ -617,12 +605,11 @@ namespace Microsoft.Research.Calypso.DryadAnalysis /// /// Create a class to diagnose the problems of a job. /// - /// Delegate used to report progress. - /// Delegate used to report status. /// Cluster where job resides. + /// Communication manager. /// Job summary. - protected JobFailureDiagnosis(ClusterConfiguration config, DryadLinqJobSummary summary, StatusReporter status, Action progress) - : base(config, summary, status, progress) + protected JobFailureDiagnosis(ClusterConfiguration config, DryadLinqJobSummary summary, CommManager manager) + : base(config, summary, manager) { this.diagnosisLog = new DiagnosisLog(this.Job, summary); if (this.Job != null) @@ -825,11 +812,10 @@ namespace Microsoft.Research.Calypso.DryadAnalysis /// Create a suitable Job Failure diagnosis object for the job being analyzed. /// /// Job to diagnose. - /// Delegate used to report errors. - /// Delegate used to report progress. + /// Communication manager. /// A subclass of JobFailureDiagnosis with the type appropriate for the job. /// Plan of the job being diagnosed. - public static JobFailureDiagnosis CreateJobFailureDiagnosis(DryadLinqJobInfo job, DryadJobStaticPlan plan, StatusReporter status, Action progress) + public static JobFailureDiagnosis CreateJobFailureDiagnosis(DryadLinqJobInfo job, DryadJobStaticPlan plan, CommManager manager) { ClusterConfiguration config = job.ClusterConfiguration; if (config is CacheClusterConfiguration) @@ -843,11 +829,10 @@ namespace Microsoft.Research.Calypso.DryadAnalysis /// Create a suitable Job Failure diagnosis object for the job being analyzed. /// /// Job to diagnose. - /// Delegate used to report errors. - /// Delegate used to report progress. /// Cluster where job resides. + /// Communication manager. /// A subclass of JobFailureDiagnosis with the type appropriate for the job. - public static JobFailureDiagnosis CreateJobFailureDiagnosis(ClusterConfiguration config, DryadLinqJobSummary summary, StatusReporter status, Action progress) + public static JobFailureDiagnosis CreateJobFailureDiagnosis(ClusterConfiguration config, DryadLinqJobSummary summary, CommManager manager) { if (config is CacheClusterConfiguration) config = (config as CacheClusterConfiguration).ActualConfig(summary); @@ -860,7 +845,8 @@ namespace Microsoft.Research.Calypso.DryadAnalysis /// This is incomplete: e.g., it does not work for tidyfs streams. /// /// Yes if there were correlated failures. - protected Decision LookForCorrelatedReadFailures() + /// Communication manager. + protected Decision LookForCorrelatedReadFailures(CommManager manager) { // if we have more than this many failures we start to worry const int maxFailures = 5; @@ -876,13 +862,13 @@ namespace Microsoft.Research.Calypso.DryadAnalysis int verticesDone = 0; foreach (ExecutedVertexInstance v in failures) { - var crf = VertexFailureDiagnosis.CreateVertexFailureDiagnosis(this.Job, this.StaticPlan, v, null, null).ChannelReadFailure(); + var crf = VertexFailureDiagnosis.CreateVertexFailureDiagnosis(this.Job, this.StaticPlan, v, manager).ChannelReadFailure(manager); if (crf != null) { channelsFailed.Add(crf); } verticesDone++; - this.ProgressReporter(verticesDone * 100 / totalFailures); + manager.Progress(verticesDone * 100 / totalFailures); } if (channelsFailed.Count() < maxFailures) return Decision.No; diff --git a/JobBrowser/JobBrowser/DiagnosisResult.Designer.cs b/JobBrowser/JobBrowser/DiagnosisResult.Designer.cs index 09ce3b2..c7de24a 100644 --- a/JobBrowser/JobBrowser/DiagnosisResult.Designer.cs +++ b/JobBrowser/JobBrowser/DiagnosisResult.Designer.cs @@ -18,7 +18,7 @@ See the Apache Version 2.0 License for specific language governing permissions a limitations under the License. */ -namespace Microsoft.Research.Calypso.DryadAnalysis +namespace Microsoft.Research.DryadAnalysis { using System; diff --git a/JobBrowser/JobBrowser/DiagnosisResult.cs b/JobBrowser/JobBrowser/DiagnosisResult.cs index 5f813eb..6f0650f 100644 --- a/JobBrowser/JobBrowser/DiagnosisResult.cs +++ b/JobBrowser/JobBrowser/DiagnosisResult.cs @@ -20,11 +20,11 @@ limitations under the License. */ using System.Collections.Generic; -using Microsoft.Research.Calypso.JobObjectModel; using System; using System.Windows.Forms; +using Microsoft.Research.JobObjectModel; -namespace Microsoft.Research.Calypso.DryadAnalysis +namespace Microsoft.Research.DryadAnalysis { /// /// Display the diagnosis results. diff --git a/JobBrowser/JobBrowser/DryadJobMain.cs b/JobBrowser/JobBrowser/DryadJobMain.cs index dfd16e2..9b307ce 100644 --- a/JobBrowser/JobBrowser/DryadJobMain.cs +++ b/JobBrowser/JobBrowser/DryadJobMain.cs @@ -23,7 +23,7 @@ using System; using System.Diagnostics; using System.Windows.Forms; -namespace Microsoft.Research.Calypso.DryadAnalysis +namespace Microsoft.Research.DryadAnalysis { static class Program { diff --git a/JobBrowser/JobBrowser/JobBrowser.Designer.cs b/JobBrowser/JobBrowser/JobBrowser.Designer.cs index be4a8f4..bc179ba 100644 --- a/JobBrowser/JobBrowser/JobBrowser.Designer.cs +++ b/JobBrowser/JobBrowser/JobBrowser.Designer.cs @@ -18,7 +18,7 @@ See the Apache Version 2.0 License for specific language governing permissions a limitations under the License. */ -namespace Microsoft.Research.Calypso.DryadAnalysis +namespace Microsoft.Research.DryadAnalysis { partial class JobBrowser { @@ -65,13 +65,13 @@ namespace Microsoft.Research.Calypso.DryadAnalysis this.menu = new System.Windows.Forms.MenuStrip(); this.jobToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem(); this.refreshToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem(); - this.collectDataToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem(); this.hideCancelledVerticesToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem(); this.exportToCSVToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem(); this.toolStripMenuItem_terminate = new System.Windows.Forms.ToolStripMenuItem(); this.packageCachedFilesToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem(); this.diagnoseToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem(); this.closeToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem(); + this.cancelCurrentWorkToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem(); this.stageToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem(); this.cacheLogsForAllVerticesToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem(); this.vertexToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem(); @@ -80,7 +80,6 @@ namespace Microsoft.Research.Calypso.DryadAnalysis this.profileLocallyCPUSamplingToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem(); this.profileLocallyMemorySamplingToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem(); this.jMStdoutMentionsToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem(); - this.jMLogsMentionsToolStripMenuItem1 = new System.Windows.Forms.ToolStripMenuItem(); this.diagnoseToolStripMenuItem1 = new System.Windows.Forms.ToolStripMenuItem(); this.viewToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem(); this.loadFileInEditorToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem(); @@ -124,7 +123,6 @@ namespace Microsoft.Research.Calypso.DryadAnalysis this.menuItem_stageVertexProfileLocallyCPUSampling = new System.Windows.Forms.ToolStripMenuItem(); this.menuItem_stageVertexProfileLocallyMemorySampling = new System.Windows.Forms.ToolStripMenuItem(); this.jMStdoutLinesToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem(); - this.jMLogsMentionsToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem(); this.diagnoseToolStripMenuItem2 = new System.Windows.Forms.ToolStripMenuItem(); this.splitContainer_vertexData = new System.Windows.Forms.SplitContainer(); this.dataGridView_vertexHeader = new System.Windows.Forms.DataGridView(); @@ -147,7 +145,6 @@ namespace Microsoft.Research.Calypso.DryadAnalysis this.toolStripStatusLabel_currentWork = new System.Windows.Forms.ToolStripStatusLabel(); this.toolStripStatusLabel_backgroundWork = new System.Windows.Forms.ToolStripStatusLabel(); this.toolStripProgressBar = new System.Windows.Forms.ToolStripProgressBar(); - this.backgroundWorker = new System.ComponentModel.BackgroundWorker(); this.menu.SuspendLayout(); ((System.ComponentModel.ISupportInitialize)(this.splitContainer_jobAndRest)).BeginInit(); this.splitContainer_jobAndRest.Panel1.SuspendLayout(); @@ -204,13 +201,13 @@ namespace Microsoft.Research.Calypso.DryadAnalysis // this.jobToolStripMenuItem.DropDownItems.AddRange(new System.Windows.Forms.ToolStripItem[] { this.refreshToolStripMenuItem, - this.collectDataToolStripMenuItem, this.hideCancelledVerticesToolStripMenuItem, this.exportToCSVToolStripMenuItem, this.toolStripMenuItem_terminate, this.packageCachedFilesToolStripMenuItem, this.diagnoseToolStripMenuItem, - this.closeToolStripMenuItem}); + this.closeToolStripMenuItem, + this.cancelCurrentWorkToolStripMenuItem}); this.jobToolStripMenuItem.Name = "jobToolStripMenuItem"; this.jobToolStripMenuItem.Size = new System.Drawing.Size(37, 20); this.jobToolStripMenuItem.Text = "Job"; @@ -248,7 +245,6 @@ namespace Microsoft.Research.Calypso.DryadAnalysis this.toolStripMenuItem_terminate.Size = new System.Drawing.Size(195, 22); this.toolStripMenuItem_terminate.Text = "Terminate job"; this.toolStripMenuItem_terminate.ToolTipText = "Requests the cluster to terminate the job execution."; - this.toolStripMenuItem_terminate.Visible = false; this.toolStripMenuItem_terminate.Click += new System.EventHandler(this.toolStripMenuItem_terminate_Click); // // packageCachedFilesToolStripMenuItem @@ -279,6 +275,13 @@ namespace Microsoft.Research.Calypso.DryadAnalysis this.closeToolStripMenuItem.ToolTipText = "Save the settings and close the window."; this.closeToolStripMenuItem.Click += new System.EventHandler(this.closeToolStripMenuItem_Click); // + // cancelCurrentWorkToolStripMenuItem + // + this.cancelCurrentWorkToolStripMenuItem.Name = "cancelCurrentWorkToolStripMenuItem"; + this.cancelCurrentWorkToolStripMenuItem.Size = new System.Drawing.Size(195, 22); + this.cancelCurrentWorkToolStripMenuItem.Text = "Cancel current work"; + this.cancelCurrentWorkToolStripMenuItem.Click += new System.EventHandler(this.cancelCurrentWorkToolStripMenuItem_Click); + // // stageToolStripMenuItem // this.stageToolStripMenuItem.DropDownItems.AddRange(new System.Windows.Forms.ToolStripItem[] { @@ -304,7 +307,6 @@ namespace Microsoft.Research.Calypso.DryadAnalysis this.profileLocallyCPUSamplingToolStripMenuItem, this.profileLocallyMemorySamplingToolStripMenuItem, this.jMStdoutMentionsToolStripMenuItem, - this.jMLogsMentionsToolStripMenuItem1, this.diagnoseToolStripMenuItem1}); this.vertexToolStripMenuItem.Enabled = false; this.vertexToolStripMenuItem.Name = "vertexToolStripMenuItem"; @@ -908,10 +910,9 @@ namespace Microsoft.Research.Calypso.DryadAnalysis this.menuItem_stageVertexProfileLocallyCPUSampling, this.menuItem_stageVertexProfileLocallyMemorySampling, this.jMStdoutLinesToolStripMenuItem, - this.jMLogsMentionsToolStripMenuItem, this.diagnoseToolStripMenuItem2}); this.contextMenu_stageVertex.Name = "vertexContextMenuStrip"; - this.contextMenu_stageVertex.Size = new System.Drawing.Size(256, 158); + this.contextMenu_stageVertex.Size = new System.Drawing.Size(256, 136); // // menuItem_stageVertexLocalDebugManaged // @@ -1252,11 +1253,6 @@ namespace Microsoft.Research.Calypso.DryadAnalysis this.toolStripProgressBar.Name = "toolStripProgressBar"; this.toolStripProgressBar.Size = new System.Drawing.Size(100, 16); // - // backgroundWorker - // - this.backgroundWorker.DoWork += new System.ComponentModel.DoWorkEventHandler(this.backgroundWorker_DoWork); - this.backgroundWorker.RunWorkerCompleted += new System.ComponentModel.RunWorkerCompletedEventHandler(this.backgroundWorker_RunWorkerCompleted); - // // JobBrowser // this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F); @@ -1352,7 +1348,6 @@ namespace Microsoft.Research.Calypso.DryadAnalysis private System.Windows.Forms.TextBox textBox_stageCode; private System.Windows.Forms.FlowLayoutPanel flowLayoutPanel3; private System.Windows.Forms.FlowLayoutPanel flowLayoutPanel4; - private System.Windows.Forms.ToolStripMenuItem collectDataToolStripMenuItem; private System.Windows.Forms.FlowLayoutPanel flowLayoutPanel5; private System.Windows.Forms.Label label1; private System.Windows.Forms.TextBox textBox_find; @@ -1362,7 +1357,6 @@ namespace Microsoft.Research.Calypso.DryadAnalysis private System.Windows.Forms.Label label_matches; private System.Windows.Forms.CheckBox checkBox_refresh; private System.Windows.Forms.Button button_filter; - private System.ComponentModel.BackgroundWorker backgroundWorker; private System.Windows.Forms.ComboBox comboBox_vertexInformation; private System.Windows.Forms.Label label_comboVertex; private System.Windows.Forms.ToolStripMenuItem viewToolStripMenuItem; @@ -1380,9 +1374,7 @@ namespace Microsoft.Research.Calypso.DryadAnalysis private System.Windows.Forms.ToolStripMenuItem menuItem_stageVertexLocalDebugUnmanaged; private System.Windows.Forms.ToolStripMenuItem debugLocallyUnmanagedToolStripMenuItem; private System.Windows.Forms.ToolStripMenuItem jMStdoutMentionsToolStripMenuItem; - private System.Windows.Forms.ToolStripMenuItem jMLogsMentionsToolStripMenuItem1; private System.Windows.Forms.ToolStripMenuItem jMStdoutLinesToolStripMenuItem; - private System.Windows.Forms.ToolStripMenuItem jMLogsMentionsToolStripMenuItem; private System.Windows.Forms.TextBox textBox_stageFilter; private System.Windows.Forms.Button button_stageFilter; private System.Windows.Forms.Button button_clearStageFilter; @@ -1412,5 +1404,6 @@ namespace Microsoft.Research.Calypso.DryadAnalysis private System.Windows.Forms.ToolStripMenuItem cacheAllLogsToolStripMenuItem; private System.Windows.Forms.ToolStripMenuItem exportToCSVToolStripMenuItem; private System.Windows.Forms.ToolStripMenuItem cacheLogsForAllVerticesToolStripMenuItem; + private System.Windows.Forms.ToolStripMenuItem cancelCurrentWorkToolStripMenuItem; } } diff --git a/JobBrowser/JobBrowser/JobBrowser.cs b/JobBrowser/JobBrowser/JobBrowser.cs index 4767cd4..5014ab7 100644 --- a/JobBrowser/JobBrowser/JobBrowser.cs +++ b/JobBrowser/JobBrowser/JobBrowser.cs @@ -34,10 +34,10 @@ using System.Threading; using System.Windows.Forms; using Microsoft.Msagl.GraphViewerGdi; using Microsoft.Msagl.Splines; -using Microsoft.Research.Calypso.JobObjectModel; -using Microsoft.Research.Calypso.Tools; +using Microsoft.Research.JobObjectModel; +using Microsoft.Research.Tools; -namespace Microsoft.Research.Calypso.DryadAnalysis +namespace Microsoft.Research.DryadAnalysis { /// /// A form to display information about a DryadLinq job. @@ -66,40 +66,7 @@ namespace Microsoft.Research.Calypso.DryadAnalysis /// private BackgroundWorkQueue queue; - /// - /// Activity to perform by the backgroundWorker. - /// - class BackgroundWorkInfo - { - public enum WorkKind - { - LoadJobInfo, - }; - - public WorkKind Work; // kind of operation to perform in the background - public bool Success; // if true the work succeeded - public DateTime workStartTime; // when work is started - - public override string ToString() - { - return this.Work.ToString(); - } - - /// - /// Add yourself to the list of pending work. - /// - /// List of pending work. - internal void AddTo(List list) - { - list.Add(this); - } - } - - /// - /// List of work activities to perform. - /// - readonly List pendingWork; - + // window regions starting from left-top in order going down #region JOB_HEADER /// @@ -352,10 +319,9 @@ namespace Microsoft.Research.Calypso.DryadAnalysis this.InitializeComponent(); this.queueWorker = new BackgroundWorker(); - this.queue = new BackgroundWorkQueue(this.queueWorker); + this.queue = new BackgroundWorkQueue(this.queueWorker, this.toolStripStatusLabel_currentWork, this.toolStripStatusLabel_backgroundWork); this.WarnedAboutDebugging = false; - this.pendingWork = new List(); this.status = new StatusWriter(this.toolStripStatusLabel, this.statusStrip, this.Status); this.refreshTimer = new System.Windows.Forms.Timer(); @@ -558,16 +524,6 @@ namespace Microsoft.Research.Calypso.DryadAnalysis this.RefreshDisplay(); } - /// - /// Load information about the current job. - /// - private void LoadJobDetails() - { - BackgroundWorkInfo work = new BackgroundWorkInfo(); - work.Work = BackgroundWorkInfo.WorkKind.LoadJobInfo; - this.StartBackgroundWork(work); - } - /// /// Loading the job information has completed. /// Time it took to load the job information. @@ -857,7 +813,7 @@ namespace Microsoft.Research.Calypso.DryadAnalysis case ExecutedVertexInstance.VertexState.Failed: return Color.Tomato; default: - throw new CalypsoDryadException("Unexpected vertex state " + state); + throw new DryadException("Unexpected vertex state " + state); } } @@ -986,22 +942,6 @@ namespace Microsoft.Research.Calypso.DryadAnalysis yield return new Tuple(unknown, VertexStateColor(ExecutedVertexInstance.VertexState.Unknown)); } - private static DryadJobStaticPlan CreatePlan(DryadLinqJobInfo job, StatusReporter status) - { - status("Constructing static plan", StatusKind.LongOp); - try - { - var result = JobObjectModel.DryadJobStaticPlan.CreatePlan(job, status); - return result; - } - catch (Exception ex) - { - status("Exception during building of static plan: " + ex.Message, StatusKind.Error); - Trace.TraceInformation(ex.ToString()); - return null; - } - } - /// /// Refresh and redisplay the query plan. /// @@ -1010,7 +950,7 @@ namespace Microsoft.Research.Calypso.DryadAnalysis this.richTextBox_file.Text = ""; var item = new BackgroundWorkItem( - (s, p) => JobObjectModel.DryadJobStaticPlan.CreatePlan(this.Job, this.Status), + m => JobObjectModel.DryadJobStaticPlan.CreatePlan(this.Job, m), this.PlanComputed, "refresh plan"); this.Queue(item); @@ -1455,7 +1395,7 @@ namespace Microsoft.Research.Calypso.DryadAnalysis this.EnableStageFiltering(true); this.stageHeaderData.RaiseListChangedEvents = false; this.currentStage = stage; - this.currentTable = null; + this.currentTable = null; // stageData is populated by the selectionChanged event handler for the stageHeader if (this.ShowingStageOrTable != KindOfStageShown.Stage) @@ -1677,10 +1617,9 @@ namespace Microsoft.Research.Calypso.DryadAnalysis /// /// Cluster object whose contents is read. /// Pattern to filter contents, for folders. - /// Used to report status. /// The file contents. - /// Progress reporter. - private static FileContents GetContents(StatusReporter status, Action progress, IClusterResidentObject path, string pattern) + /// Communication manager. + private static FileContents GetContents(CommManager manager, IClusterResidentObject path, string pattern) { if (path == null) { @@ -1705,6 +1644,7 @@ namespace Microsoft.Research.Calypso.DryadAnalysis int displayed = 0; foreach (IClusterResidentObject d in dirs) { + manager.Token.ThrowIfCancellationRequested(); if (d.Exception != null) { error += " [Error " + d.Exception.Message + "]"; @@ -1730,7 +1670,7 @@ namespace Microsoft.Research.Calypso.DryadAnalysis } else { - status("Extracting contents of " + path, StatusKind.LongOp); + manager.Status("Extracting contents of " + path, StatusKind.LongOp); ISharedStreamReader sr = path.GetStream(); if (sr.Exception != null) { @@ -1741,7 +1681,7 @@ namespace Microsoft.Research.Calypso.DryadAnalysis { if (path.Size == 0) error += "[empty]"; - var contents = sr.ReadToEnd(); + var contents = sr.ReadToEnd(manager.Token); return new FileContents(contents, error, linkCache); } } @@ -1756,7 +1696,7 @@ namespace Microsoft.Research.Calypso.DryadAnalysis private void DisplayContents1(IClusterResidentObject path, string pattern) { var item = new BackgroundWorkItem( - (s, p) => GetContents(s, p, path, pattern), + m => GetContents(m, path, pattern), this.ShowContents, "Read file"); this.Queue(item); @@ -1892,7 +1832,9 @@ namespace Microsoft.Research.Calypso.DryadAnalysis { this.label_title.Text = "Inputs"; this.Status("Discovering vertex channel information", StatusKind.LongOp); - bool found = this.currentVertex.DiscoverChannels(true, false, false, this.Status, this.UpdateProgress); + // TODO: this should run in the background + CommManager manager = new CommManager(this.Status, this.UpdateProgress, new CancellationTokenSource().Token); + bool found = this.currentVertex.DiscoverChannels(true, false, false, manager); if (found) { this.richTextBox_file.SuspendLayout(); @@ -1922,7 +1864,9 @@ namespace Microsoft.Research.Calypso.DryadAnalysis { this.label_title.Text = "Outputs"; this.Status("Discovering vertex channel information", StatusKind.LongOp); - bool found = this.currentVertex.DiscoverChannels(false, true, false, this.Status, this.UpdateProgress); + // TODO: this should run in the background + CommManager manager = new CommManager(this.Status, this.UpdateProgress, new CancellationTokenSource().Token); + bool found = this.currentVertex.DiscoverChannels(false, true, false, manager); if (found) { this.richTextBox_file.SuspendLayout(); @@ -2319,7 +2263,7 @@ namespace Microsoft.Research.Calypso.DryadAnalysis this.Status("Refreshing...", StatusKind.LongOp); this.Job.InvalidateCaches(); this.stageColorMap = null; // force recomputation - this.LoadJobDetails(); + this.RefreshJob(); } /// @@ -2340,8 +2284,7 @@ namespace Microsoft.Research.Calypso.DryadAnalysis private void JobBrowser_FormClosing(object sender, FormClosingEventArgs e) { this.refreshTimer.Stop(); - this.pendingWork.Clear(); - + this.queue.Stop(); this.formSettings.WarnedAboutDebugging = this.WarnedAboutDebugging; this.formSettings.WarnedAboutProfiling = this.WarnedAboutProfiling; this.formSettings.Location = this.Location; @@ -2496,107 +2439,37 @@ namespace Microsoft.Research.Calypso.DryadAnalysis /// Unused. private void JobBrowser_Shown(object sender, EventArgs e) { - this.LoadJobDetails(); - } - - #region BACKGROUND_WORK - /// - /// Start performing a piece of background work. - /// - /// Work to perform. - private void StartBackgroundWork(BackgroundWorkInfo work) - { - if (this.backgroundWorker.IsBusy) - { - work.AddTo(this.pendingWork); - this.toolStripStatusLabel_backgroundWork.Text = this.pendingWork.Count() + " tasks pending."; - this.Status("Queued task for execution", StatusKind.OK); - return; - } - - backgroundWorker.RunWorkerAsync(work); + this.RefreshJob(); } /// - /// Show the work currently being done. + /// Refresh the job details. /// - /// Message to display (work description). - private void ShowCurrentWork(string msg) + private void RefreshJob() { - if (this.InvokeRequired) - this.Invoke(new Action(this.ShowCurrentWork), msg); - else - this.toolStripStatusLabel_currentWork.Text = msg; - } - - /// - /// Perform some background work. - /// - /// Unused. - /// Event describing the work to perform. - private void backgroundWorker_DoWork(object sender, DoWorkEventArgs e) - { - BackgroundWorkInfo work = (BackgroundWorkInfo)e.Argument; - work.workStartTime = DateTime.Now; - this.ShowCurrentWork("Doing " + work + "."); - - switch (work.Work) - { - case BackgroundWorkInfo.WorkKind.LoadJobInfo: - { - work.Success = this.Job.CollectEssentialInformation(this.Status, this.UpdateProgress); - break; - } - } - e.Result = work; - } - - /// - /// Background work has terminated. - /// - /// Unused. - /// Event describing the result. - private void backgroundWorker_RunWorkerCompleted(object sender, RunWorkerCompletedEventArgs e) - { - this.ShowCurrentWork("Doing nothing."); - if (e.Cancelled) - { - this.Status("Background work was cancelled", StatusKind.OK); - goto end; - } - else if (e.Error != null) - { - this.Status("Exception during background work: " + e.Error.Message, StatusKind.Error); - Trace.TraceInformation(e.ToString()); - goto end; - } - - if (e.Result == null) - { - Trace.TraceInformation("Null result from background work!"); - // I don't know why this happens - goto end; - } - BackgroundWorkInfo result = (BackgroundWorkInfo)e.Result; - if (result.Success) - { - switch (result.Work) + DryadLinqJobInfo job = this.Job; + DateTime start = DateTime.Now; + var item = new BackgroundWorkItem( + m => { - case BackgroundWorkInfo.WorkKind.LoadJobInfo: - this.LoadJobCompleted(DateTime.Now - result.workStartTime); - break; - } - this.Status("Completed " + result, StatusKind.OK); - } - else - { - // failed in background work - goto end; - } + job.CollectEssentialInformation(m); + return DateTime.Now - start; + }, + this.JobInfoLoaded, + "refreshJob"); + this.Queue(item); + } - // do not overwrite the error message if the job did not succeed + /// + /// Called after a job has been loaded. + /// + /// If true the loading has been cancelled. + /// Time to load job. + private void JobInfoLoaded(bool cancelled, TimeSpan loadTime) + { + if (cancelled) return; - // refresh the stage view too + this.LoadJobCompleted(loadTime); string s = this.currentStage != null ? this.currentStage.Name : null; if (this.doingStartup && string.IsNullOrEmpty(s)) { @@ -2613,17 +2486,7 @@ namespace Microsoft.Research.Calypso.DryadAnalysis { this.SetTable(this.currentTable.Refresh(this.Job, this.Status, !this.hideCancelledVerticesToolStripMenuItem.Checked)); } - - end: - if (this.pendingWork.Any()) - { - BackgroundWorkInfo work = this.pendingWork[0]; - this.pendingWork.RemoveAt(0); - this.toolStripStatusLabel_backgroundWork.Text = this.pendingWork.Count() + " tasks pending."; - this.StartBackgroundWork(work); - } } - #endregion #region MOUSE_DYNAMIC_VIEWS /// @@ -2879,7 +2742,7 @@ namespace Microsoft.Research.Calypso.DryadAnalysis lv.Show(); var item = new BackgroundWorkItem( - (s, p) => ScanJMStdout(this.currentVertex, this.Job.ManagerVertex.StdoutFile, lv), + m => ScanJMStdout(this.currentVertex, this.Job.ManagerVertex.StdoutFile, lv), (c, b) => { }, "findStdout"); this.Queue(item); @@ -2936,7 +2799,8 @@ namespace Microsoft.Research.Calypso.DryadAnalysis /// Unused. private void diagnoseToolStripMenuItem_Click(object sender, EventArgs e) { - JobFailureDiagnosis diagnosis = JobFailureDiagnosis.CreateJobFailureDiagnosis(this.Job, this.staticPlan, this.Status, this.UpdateProgress); + CommManager manager = new CommManager(this.Status, this.UpdateProgress, new CancellationToken()); + JobFailureDiagnosis diagnosis = JobFailureDiagnosis.CreateJobFailureDiagnosis(this.Job, this.staticPlan, manager); DiagnosisLog log = diagnosis.Diagnose(); this.DisplayDiagnosis(log); } @@ -3328,7 +3192,7 @@ namespace Microsoft.Research.Calypso.DryadAnalysis ClusterStatus clusterStatus = this.Job.ClusterConfiguration.CreateClusterStatus(); var item = new BackgroundWorkItem( - (s, p) => ClusterWork.CancelJobs(job, clusterStatus, s, p), + m => ClusterWork.CancelJobs(job, clusterStatus, m), (c, b) => { }, "cancel"); this.Queue(item); @@ -3344,7 +3208,9 @@ namespace Microsoft.Research.Calypso.DryadAnalysis if (this.currentVertex == null) return; - VertexFailureDiagnosis vfd = VertexFailureDiagnosis.CreateVertexFailureDiagnosis(this.Job, this.staticPlan, this.currentVertex, this.Status, this.UpdateProgress); + // TODO: this should run in the background + CommManager manager = new CommManager(this.Status, this.UpdateProgress, new CancellationToken()); + VertexFailureDiagnosis vfd = VertexFailureDiagnosis.CreateVertexFailureDiagnosis(this.Job, this.staticPlan, this.currentVertex, manager); DiagnosisLog log = vfd.Diagnose(); this.DisplayDiagnosis(log); } @@ -3387,16 +3253,6 @@ namespace Microsoft.Research.Calypso.DryadAnalysis /// Unused. private void dataGridView_Scroll(object sender, ScrollEventArgs e) { - if (e.Type == ScrollEventType.EndScroll) - { - VScrollBar scrollbar = sender as VScrollBar; - if (scrollbar == null) - return; - DataGridView view = scrollbar.Parent as DataGridView; - if (view == null) - return; - view.AutoResizeColumns(DataGridViewAutoSizeColumnsMode.DisplayedCells); - } } /// @@ -3427,9 +3283,7 @@ namespace Microsoft.Research.Calypso.DryadAnalysis /// Unused. private void cacheAllLogsToolStripMenuItem_Click(object sender, EventArgs e) { - BackgroundWorkInfo work = new BackgroundWorkInfo(); - work.Work = BackgroundWorkInfo.WorkKind.LoadJobInfo; - this.StartBackgroundWork(work); + this.RefreshJob(); IClusterResidentObject folder = this.richtextBoxShownFile; if (folder == null || folder.Exception != null || !folder.RepresentsAFolder) @@ -3518,7 +3372,7 @@ namespace Microsoft.Research.Calypso.DryadAnalysis List vertices = this.stageData.ToList(); var item = new BackgroundWorkItem( - (s, p) => CacheAllVertices(this.Job.ClusterConfiguration, this.Job.Summary, vertices, s, p), + m => CacheAllVertices(this.Job.ClusterConfiguration, this.Job.Summary, vertices, m), (c, b) => { }, "cacheAll"); this.Queue(item); @@ -3528,23 +3382,27 @@ namespace Microsoft.Research.Calypso.DryadAnalysis /// Cache the vertices in the list; executed on the background thread. /// /// True: success. + /// Communication manager. + /// Cluster configuration. + /// Job to cache. + /// Vertices to cache. private static bool CacheAllVertices( ClusterConfiguration config, DryadLinqJobSummary summary, List vertices, - StatusReporter status, Action progress) + CommManager manager) { int done = 0; int todo = vertices.Count; int files = 0; - status("Caching data for " + todo + " vertices", StatusKind.LongOp); + manager.Status("Caching data for " + todo + " vertices", StatusKind.LongOp); foreach (ExecutedVertexInstance v in vertices) { files += CacheVertexInfo(config, summary, v); done++; - progress(done / todo); + manager.Progress(done / todo); } - progress(100); - status("Cached " + files + " files", StatusKind.OK); + manager.Progress(100); + manager.Status("Cached " + files + " files", StatusKind.OK); return true; } @@ -3576,6 +3434,7 @@ namespace Microsoft.Research.Calypso.DryadAnalysis } ISharedStreamReader reader = file.GetStream(); + // ReSharper disable once UnusedVariable foreach (string line in reader.ReadAllLines()) { // discard; causes caching @@ -3584,6 +3443,11 @@ namespace Microsoft.Research.Calypso.DryadAnalysis } return cached; } + + private void cancelCurrentWorkToolStripMenuItem_Click(object sender, EventArgs e) + { + this.queue.CancelCurrentWork(); + } } /// diff --git a/JobBrowser/JobBrowser/JobBrowser.csproj b/JobBrowser/JobBrowser/JobBrowser.csproj index 32ec137..fe6aed5 100644 --- a/JobBrowser/JobBrowser/JobBrowser.csproj +++ b/JobBrowser/JobBrowser/JobBrowser.csproj @@ -1,5 +1,6 @@ - + + Debug AnyCPU @@ -8,8 +9,8 @@ {EDDD2E0B-A52B-4E25-9436-B874017673FF} Exe Properties - DryadAnalysis - JobBrowser + JobBrowser + DryadLinqBrowser v4.5 512 @@ -22,6 +23,7 @@ 3.5 + publish\ true Disk @@ -36,29 +38,6 @@ 1.0.0.%2a false true - - - - true - full - false - bin\Debug\ - TRACE;DEBUG - prompt - 4 - bin\Debug\JobBrowser.xml - AllRules.ruleset - false - - - pdbonly - true - bin\Release\ - TRACE - prompt - 4 - AllRules.ruleset - false true @@ -79,7 +58,48 @@ prompt AllRules.ruleset + + OnBuildSuccess + + + False + ..\..\..\Program Files (x86)\Microsoft Visual Studio 9.0\Common7\IDE\PublicAssemblies\EnvDTE.dll + + + False + ..\..\..\Program Files (x86)\Microsoft Visual Studio 9.0\Common7\IDE\PublicAssemblies\EnvDTE80.dll + + + False + ..\..\..\Program Files (x86)\Microsoft Visual Studio 9.0\Common7\IDE\PublicAssemblies\EnvDTE90.dll + + + False + ..\packages\Microsoft.Data.Edm.5.6.1\lib\net40\Microsoft.Data.Edm.dll + + + False + ..\packages\Microsoft.Data.OData.5.6.1\lib\net40\Microsoft.Data.OData.dll + + + False + ..\packages\Microsoft.Data.Services.Client.5.6.1\lib\net40\Microsoft.Data.Services.Client.dll + + + False + ..\packages\Microsoft.Hadoop.Client.1.1.0.7\lib\net40\Microsoft.Hadoop.Client.dll + + + False + ..\..\Daphne\Dependences\Microsoft.Hpc.Scheduler.dll + True + + + False + ..\..\Daphne\Dependences\Microsoft.Hpc.Scheduler.Properties.dll + True + False ..\packages\Microsoft.Research.DryadLINQ.MSAGL.3.0.0.1\lib\Microsoft.Msagl.dll @@ -92,13 +112,71 @@ False ..\packages\Microsoft.Research.DryadLINQ.MSAGL.3.0.0.1\lib\Microsoft.Msagl.GraphViewerGdi.dll + + False + ..\packages\Microsoft.Research.Peloponnese.0.7.2-beta\lib\net45\Microsoft.Research.Peloponnese.HadoopBridge.dll + + + False + ..\packages\Microsoft.Research.Peloponnese.0.7.2-beta\lib\net45\Microsoft.Research.Peloponnese.Utils.dll + + + ..\packages\Microsoft.Bcl.Async.1.0.166\lib\net40\Microsoft.Threading.Tasks.dll + + + ..\packages\Microsoft.Bcl.Async.1.0.166\lib\net40\Microsoft.Threading.Tasks.Extensions.dll + + + ..\packages\Microsoft.Bcl.Async.1.0.166\lib\net40\Microsoft.Threading.Tasks.Extensions.Desktop.dll + + + ..\packages\Microsoft.WindowsAzure.Common.1.0.1\lib\net45\Microsoft.WindowsAzure.Common.dll + + + ..\packages\Microsoft.WindowsAzure.Common.1.0.1\lib\net45\Microsoft.WindowsAzure.Common.NetFramework.dll + + + ..\packages\Microsoft.WindowsAzure.ConfigurationManager.2.0.3\lib\net40\Microsoft.WindowsAzure.Configuration.dll + + + ..\packages\Microsoft.WindowsAzure.Management.1.0.0\lib\net40\Microsoft.WindowsAzure.Management.dll + + + False + ..\packages\Microsoft.WindowsAzure.Management.HDInsight.1.1.0.7\lib\net40\Microsoft.WindowsAzure.Management.HDInsight.dll + + + False + ..\packages\Microsoft.Hadoop.Client.1.1.0.7\lib\net40\Microsoft.WindowsAzure.Management.HDInsight.Framework.dll + + + False + ..\packages\Microsoft.Hadoop.Client.1.1.0.7\lib\net40\Microsoft.WindowsAzure.Management.HDInsight.Framework.Core.dll + + + ..\packages\Microsoft.WindowsAzure.Management.Storage.1.0.0\lib\net40\Microsoft.WindowsAzure.Management.Storage.dll + + + False + ..\packages\WindowsAzure.Storage.3.1.0.1\lib\net40\Microsoft.WindowsAzure.Storage.dll + 3.5 + + + + ..\packages\Microsoft.Net.Http.2.2.19\lib\net45\System.Net.Http.Extensions.dll + + + ..\packages\Microsoft.Net.Http.2.2.19\lib\net45\System.Net.Http.Primitives.dll + + + 3.5 @@ -186,11 +264,11 @@ JobObjectModel - {20B91AAF-AAD4-47DF-9F1D-494DE6E066F9} + {20b91aaf-aad4-47df-9f1d-494de6e066f9} Tools - {27635A68-ADFC-4192-9262-B4E6ECDDCE09} + {27635a68-adfc-4192-9262-b4e6ecddce09} UsefulForms @@ -234,6 +312,19 @@ + + + + + + + + This project references NuGet package(s) that are missing on this computer. Enable NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. + + + + + - + \ No newline at end of file diff --git a/JobBrowser/JobBrowser/JobBrowser.resx b/JobBrowser/JobBrowser/JobBrowser.resx index 951e670..0e46710 100644 --- a/JobBrowser/JobBrowser/JobBrowser.resx +++ b/JobBrowser/JobBrowser/JobBrowser.resx @@ -126,9 +126,6 @@ 17, 17 - - 209, 17 - 42 diff --git a/JobBrowser/JobBrowser/LocalDebuggingAndProfiling.cs b/JobBrowser/JobBrowser/LocalDebuggingAndProfiling.cs index 2920dda..f8201ff 100644 --- a/JobBrowser/JobBrowser/LocalDebuggingAndProfiling.cs +++ b/JobBrowser/JobBrowser/LocalDebuggingAndProfiling.cs @@ -24,13 +24,13 @@ using System.Collections.Generic; using System.IO; using System.Linq; using System.Text.RegularExpressions; -using Microsoft.Research.Calypso.JobObjectModel; -using Microsoft.Research.Calypso.Tools; -using Microsoft.Research.Calypso.UsefulForms; +using Microsoft.Research.JobObjectModel; +using Microsoft.Research.Tools; +using Microsoft.Research.UsefulForms; using Microsoft.Win32; using System.Diagnostics; -namespace Microsoft.Research.Calypso.DryadAnalysis +namespace Microsoft.Research.DryadAnalysis { /// /// Class that encapsulates the steps required to debug a vertex locally. diff --git a/JobBrowser/JobBrowser/LogViewer.Designer.cs b/JobBrowser/JobBrowser/LogViewer.Designer.cs index 360b6c0..693c491 100644 --- a/JobBrowser/JobBrowser/LogViewer.Designer.cs +++ b/JobBrowser/JobBrowser/LogViewer.Designer.cs @@ -18,9 +18,9 @@ See the Apache Version 2.0 License for specific language governing permissions a limitations under the License. */ -using Microsoft.Research.Calypso.Tools; +using Microsoft.Research.Tools; -namespace Microsoft.Research.Calypso.DryadAnalysis +namespace Microsoft.Research.DryadAnalysis { partial class LogViewer { diff --git a/JobBrowser/JobBrowser/LogViewer.cs b/JobBrowser/JobBrowser/LogViewer.cs index c1fd03e..9c00e1f 100644 --- a/JobBrowser/JobBrowser/LogViewer.cs +++ b/JobBrowser/JobBrowser/LogViewer.cs @@ -19,15 +19,15 @@ limitations under the License. */ -using Microsoft.Research.Calypso.JobObjectModel; using System; using System.Collections.Generic; using System.Diagnostics; using System.IO; using System.Windows.Forms; -using Microsoft.Research.Calypso.Tools; +using Microsoft.Research.JobObjectModel; +using Microsoft.Research.Tools; -namespace Microsoft.Research.Calypso.DryadAnalysis +namespace Microsoft.Research.DryadAnalysis { /// /// A log viewer displays fragments of logs or other text files. @@ -37,7 +37,7 @@ namespace Microsoft.Research.Calypso.DryadAnalysis bool canceled; DGVData shownText; - DGVData shownLogLines; + DGVData shownLogLines; StatusWriter status; /// @@ -77,7 +77,7 @@ namespace Microsoft.Research.Calypso.DryadAnalysis } else { - this.shownLogLines = new DGVData(); + this.shownLogLines = new DGVData(); this.filteredDataGridView.SetDataSource(this.shownLogLines); foreach (string s in new string[] { "Malformed", "IsError", "OriginalLogLine", "File", "LineNo" }) { @@ -132,7 +132,7 @@ namespace Microsoft.Research.Calypso.DryadAnalysis long bytes = 0; List toAddText = new List(); - List toAddLog = new List(); + List toAddLog = new List(); while (!sr.EndOfStream) { string line = sr.ReadLine(); @@ -141,7 +141,7 @@ namespace Microsoft.Research.Calypso.DryadAnalysis toAddText.Add(new TextFileLine(lineno, line)); else { - PositionedCosmosLogEntry cle = new PositionedCosmosLogEntry(filename, lineno, line); + PositionedDryadLogEntry cle = new PositionedDryadLogEntry(filename, lineno, line); if (cle.Malformed) { Trace.TraceInformation("Malformed log entry: " + cle.OriginalLogLine); @@ -204,7 +204,7 @@ namespace Microsoft.Research.Calypso.DryadAnalysis } else { - PositionedCosmosLogEntry cle = new PositionedCosmosLogEntry(file, lineno, text); + PositionedDryadLogEntry cle = new PositionedDryadLogEntry(file, lineno, text); if (cle.Malformed) return; this.shownLogLines.AddItem(cle); @@ -274,7 +274,7 @@ namespace Microsoft.Research.Calypso.DryadAnalysis var rows = this.filteredDataGridView.DataGridView.SelectedRows; for (int i = 0; i < rows.Count; i++) { - PositionedCosmosLogEntry entry = ((PositionedCosmosLogEntry)rows[i].DataBoundItem); + PositionedDryadLogEntry entry = ((PositionedDryadLogEntry)rows[i].DataBoundItem); position += entry.File + ":" + entry.LineNo + Environment.NewLine; } MessageBox.Show(position, "File containing log entries"); @@ -319,7 +319,7 @@ namespace Microsoft.Research.Calypso.DryadAnalysis /// /// Cosmos log entry with position information. /// - public class PositionedCosmosLogEntry : CosmosLogEntry + public class PositionedDryadLogEntry : DryadLogEntry { /// /// File containing the log entry. @@ -336,7 +336,7 @@ namespace Microsoft.Research.Calypso.DryadAnalysis /// File containing the log entry. /// Line number. /// Line contents. - public PositionedCosmosLogEntry(string file, long lineno, string line) + public PositionedDryadLogEntry(string file, long lineno, string line) : base(line) { this.File = file; diff --git a/JobBrowser/JobBrowser/Properties/AssemblyInfo.cs b/JobBrowser/JobBrowser/Properties/AssemblyInfo.cs index 628ac4c..74ab41b 100644 --- a/JobBrowser/JobBrowser/Properties/AssemblyInfo.cs +++ b/JobBrowser/JobBrowser/Properties/AssemblyInfo.cs @@ -25,12 +25,12 @@ using System.Runtime.InteropServices; // General Information about an assembly is controlled through the following // set of attributes. Change these attribute values to modify the information // associated with an assembly. -[assembly: AssemblyTitle("DryadAnalysis")] +[assembly: AssemblyTitle("DryadLinqBrowser")] [assembly: AssemblyDescription("")] [assembly: AssemblyConfiguration("")] -[assembly: AssemblyCompany("Microsoft Corporation")] -[assembly: AssemblyProduct("DryadAnalysis")] -[assembly: AssemblyCopyright("Copyright © Microsoft Corporation. All rights reserved.")] +[assembly: AssemblyCompany("MSIT")] +[assembly: AssemblyProduct("DryadLinqBrowser")] +[assembly: AssemblyCopyright("Copyright © Microsoft")] [assembly: AssemblyTrademark("")] [assembly: AssemblyCulture("")] @@ -52,5 +52,5 @@ using System.Runtime.InteropServices; // You can specify all the values or you can default the Build and Revision Numbers // by using the '*' as shown below: // [assembly: AssemblyVersion("1.0.*")] -[assembly: AssemblyVersion("0.1.0.0")] -[assembly: AssemblyFileVersion("0.1.0.0")] +[assembly: AssemblyVersion("1.0.1.0")] +[assembly: AssemblyFileVersion("1.0.1.0")] diff --git a/JobBrowser/JobBrowser/jobschedule.cs b/JobBrowser/JobBrowser/jobschedule.cs index a9dd6e9..69df083 100644 --- a/JobBrowser/JobBrowser/jobschedule.cs +++ b/JobBrowser/JobBrowser/jobschedule.cs @@ -19,13 +19,13 @@ limitations under the License. */ -using Microsoft.Research.Calypso.Tools; using System; using System.Collections.Generic; using System.Drawing; using System.Linq; +using Microsoft.Research.Tools; -namespace Microsoft.Research.Calypso.JobObjectModel +namespace Microsoft.Research.JobObjectModel { /// /// Information about the dynamic execution schedule of a job. diff --git a/JobBrowser/JobBrowser/packages.config b/JobBrowser/JobBrowser/packages.config index 6d273c8..420f9ff 100644 --- a/JobBrowser/JobBrowser/packages.config +++ b/JobBrowser/JobBrowser/packages.config @@ -1,4 +1,22 @@ - + + + + + + + + + - + + + + + + + + + + + \ No newline at end of file diff --git a/JobBrowser/Microsoft.Research.JobBrowser.nuspec b/JobBrowser/Microsoft.Research.JobBrowser.nuspec deleted file mode 100644 index 67ad440..0000000 --- a/JobBrowser/Microsoft.Research.JobBrowser.nuspec +++ /dev/null @@ -1,48 +0,0 @@ - - - - Microsoft.Research.Calypso - 0.7.0-beta024 - Calypso Job Browser for DryadLINQ - msrsvc - msrsvc,Microsoft - http://www.apache.org/licenses/LICENSE-2.0 - http://research.microsoft.com/en-us/um/siliconvalley/projects/BigDataDev/ - true - The DryadLINQ job browser is a graphical user interface which provides a unified view of a large part of the distributed state of a DryadLINQ job. In this document we provide a brief overview of the main capabilities of the DryadLINQ job browser. - © Microsoft Corporation. All rights reserved. - en-US - DryadLINQ Dryad Azure - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/JobBrowser/Tools/Graphlayout.cs b/JobBrowser/Tools/Graphlayout.cs index c37821f..ba548d6 100644 --- a/JobBrowser/Tools/Graphlayout.cs +++ b/JobBrowser/Tools/Graphlayout.cs @@ -24,7 +24,7 @@ using System.Collections.Generic; using System.Drawing; using System.Drawing.Drawing2D; -namespace Microsoft.Research.Calypso.Tools +namespace Microsoft.Research.Tools { /// /// Deals with the plane representation of a graph. diff --git a/JobBrowser/Tools/Properties/AssemblyInfo.cs b/JobBrowser/Tools/Properties/AssemblyInfo.cs index ca06971..2bcfed4 100644 --- a/JobBrowser/Tools/Properties/AssemblyInfo.cs +++ b/JobBrowser/Tools/Properties/AssemblyInfo.cs @@ -28,9 +28,9 @@ using System.Runtime.InteropServices; [assembly: AssemblyTitle("Tools")] [assembly: AssemblyDescription("")] [assembly: AssemblyConfiguration("")] -[assembly: AssemblyCompany("Microsoft Corporation")] +[assembly: AssemblyCompany("MSIT")] [assembly: AssemblyProduct("Tools")] -[assembly: AssemblyCopyright("Copyright © Microsoft Corporation. All rights reserved.")] +[assembly: AssemblyCopyright("Copyright © MSIT 2008")] [assembly: AssemblyTrademark("")] [assembly: AssemblyCulture("")] @@ -52,5 +52,5 @@ using System.Runtime.InteropServices; // You can specify all the values or you can default the Build and Revision Numbers // by using the '*' as shown below: // [assembly: AssemblyVersion("1.0.*")] -[assembly: AssemblyVersion("0.1.0.0")] -[assembly: AssemblyFileVersion("0.1.0.0")] +[assembly: AssemblyVersion("1.0.0.0")] +[assembly: AssemblyFileVersion("1.0.0.0")] diff --git a/JobBrowser/Tools/Tools.csproj b/JobBrowser/Tools/Tools.csproj index 9711bdd..48d27a0 100644 --- a/JobBrowser/Tools/Tools.csproj +++ b/JobBrowser/Tools/Tools.csproj @@ -1,4 +1,4 @@ - + Debug @@ -9,7 +9,7 @@ Library Properties Microsoft.Research.Artemis.Tools - Microsoft.Research.DryadLinq.JobBrowser.Tools + Microsoft.Research.Calypso.Tools v4.5 512 @@ -33,28 +33,6 @@ true - - true - full - false - bin\Debug\ - DEBUG;TRACE - prompt - 4 - bin\Debug\Microsoft.Research.DryadLinq.JobBrowser.Tools.xml - AllRules.ruleset - false - - - pdbonly - true - bin\Release\ - TRACE - prompt - 4 - AllRules.ruleset - false - true bin\x64\Debug\ @@ -84,6 +62,7 @@ + @@ -117,4 +96,4 @@ --> - + \ No newline at end of file diff --git a/JobBrowser/Tools/drawingSurface.cs b/JobBrowser/Tools/drawingSurface.cs index c6d43c7..e5dfc88 100644 --- a/JobBrowser/Tools/drawingSurface.cs +++ b/JobBrowser/Tools/drawingSurface.cs @@ -19,7 +19,7 @@ limitations under the License. */ -namespace Microsoft.Research.Calypso.Tools +namespace Microsoft.Research.Tools { using System; using System.Drawing; diff --git a/JobBrowser/Tools/partitionedTables.cs b/JobBrowser/Tools/partitionedTables.cs index 858b04c..943b38c 100644 --- a/JobBrowser/Tools/partitionedTables.cs +++ b/JobBrowser/Tools/partitionedTables.cs @@ -19,7 +19,7 @@ limitations under the License. */ -namespace Microsoft.Research.Calypso.Tools +namespace Microsoft.Research.Tools { using System.Collections.Generic; using System.IO; diff --git a/JobBrowser/Tools/tools.cs b/JobBrowser/Tools/tools.cs index dc47e3f..3132511 100644 --- a/JobBrowser/Tools/tools.cs +++ b/JobBrowser/Tools/tools.cs @@ -31,6 +31,7 @@ using System.Net; using System.Reflection; using System.Runtime.InteropServices; using System.Text; +using System.Windows.Forms; using System.Xml; using System.Xml.Serialization; using System.Text.RegularExpressions; @@ -38,7 +39,7 @@ using System.Threading; using System.Security.Cryptography; // Implement here generally-useful tools. -namespace Microsoft.Research.Calypso.Tools +namespace Microsoft.Research.Tools { /// /// An error handling function. @@ -66,6 +67,38 @@ namespace Microsoft.Research.Calypso.Tools LongOp, }; + /// + /// Communication management with background activities. + /// + public struct CommManager + { + /// + /// Used to report status. + /// + public StatusReporter Status; + /// + /// Used to report progress. + /// + public Action Progress; + /// + /// Used to cancel activities. + /// + public CancellationToken Token; + + /// + /// Create a communication manager. + /// + /// Status to report errors. + /// Action to report progress. + /// Token to cancel computations. + public CommManager(StatusReporter status, Action progress, CancellationToken token) + { + this.Status = status; + this.Progress = progress; + this.Token = token; + } + } + /// /// Untyped version of work item. /// @@ -100,6 +133,10 @@ namespace Microsoft.Research.Calypso.Tools /// Exception that occurred during background work (or null). /// void RunContinuation(Exception ex); + /// + /// Can be used to cancel this work item. + /// + CancellationTokenSource TokenSource { get; } } /// @@ -111,7 +148,7 @@ namespace Microsoft.Research.Calypso.Tools /// /// Computation to invoke. If the computation is not cancelled the result is passed as the second argument to the continuation. /// - public Func, T> Computation { get; protected set; } + public Func Computation { get; protected set; } /// /// Function to call when the work is completed. The first argument is 'true' if the computation was not cancelled. The second argument is the result of the computation. @@ -141,6 +178,10 @@ namespace Microsoft.Research.Calypso.Tools /// Queue containing item. /// private BackgroundWorkQueue queue; + /// + /// Source for cancellation token. + /// + public CancellationTokenSource TokenSource { get; protected set; } // ReSharper disable ConvertToConstant.Local bool TraceAsync = @@ -159,7 +200,7 @@ namespace Microsoft.Research.Calypso.Tools /// Computation to perform on a background thread. Ideally this should always be a static method. /// Continuation to invoke on the foreground thread when work is done. /// Description of the background work. - public BackgroundWorkItem(Func, T> computation, Action continuation, string description) + public BackgroundWorkItem(Func computation, Action continuation, string description) { this.Description = description; this.Computation = computation; @@ -167,23 +208,22 @@ namespace Microsoft.Research.Calypso.Tools this.reporter = null; this.queue = null; this.Id = crtid++; + this.TokenSource = new CancellationTokenSource(); } /// /// Perform the background work. /// - /// Worker which does the work. - /// Delegate used to report errors. + /// Worker which does the work. + /// Delegate used to report errors. /// Delegate used to report progress. /// If true for an item, cancel it. - // ReSharper disable ParameterHidesMember - public void Queue(BackgroundWorkQueue queue, StatusReporter reporter, Action progressReporter, Func cancel) - // ReSharper restore ParameterHidesMember + public void Queue(BackgroundWorkQueue q, StatusReporter rep, Action progressReporter, Func cancel) { if (TraceAsync) Console.WriteLine("{0} Queueing {1}", Utilities.PreciseTime, this.Description); - this.queue = queue; - this.reporter = reporter; + this.queue = q; + this.reporter = rep; this.progress = progressReporter; this.queue.CancelMatching(cancel); this.queue.Enqueue(this); @@ -200,7 +240,8 @@ namespace Microsoft.Research.Calypso.Tools Console.WriteLine("{0} Running function {1}", Utilities.PreciseTime, this.Description); try { - this.Result = this.Computation(this.reporter, this.progress); + CommManager manager = new CommManager(this.reporter, this.progress, this.TokenSource.Token); + this.Result = this.Computation(manager); } catch (Exception ex) { @@ -235,6 +276,7 @@ namespace Microsoft.Research.Calypso.Tools if (TraceAsync) Console.WriteLine("{1}/{0}: Cancelling", this.Description, this.Id); this.Cancelled = true; + this.TokenSource.Cancel(); this.queue.CancelMe(this); } @@ -266,12 +308,18 @@ namespace Microsoft.Research.Calypso.Tools /// IBackgroundWorkItem current; + private ToolStripStatusLabel currentItemLabel, queueSizeLabel; + /// /// Create a background work queue servicing a specified worker. /// /// Worker to use. - public BackgroundWorkQueue(BackgroundWorker worker) + /// Label where the current work is displayed. + /// Label where the queue size is displayed. + public BackgroundWorkQueue(BackgroundWorker worker, ToolStripStatusLabel current, ToolStripStatusLabel queue) { + this.currentItemLabel = current; + this.queueSizeLabel = queue; if (worker == null) throw new ArgumentNullException("worker"); this.BackgroundWorker = worker; @@ -280,6 +328,7 @@ namespace Microsoft.Research.Calypso.Tools this.BackgroundWorker.DoWork += this.worker_DoWork; this.queue = new List(); this.current = null; + this.stopped = false; } /// @@ -289,7 +338,7 @@ namespace Microsoft.Research.Calypso.Tools /// Unused. void worker_DoWork(object sender, DoWorkEventArgs e) { - if (this.current == null) + if (this.stopped || this.current == null) return; #if DEBUG_WORKQUEUE #endif @@ -306,6 +355,8 @@ namespace Microsoft.Research.Calypso.Tools e.Cancel = true; } + private bool stopped; + /// /// Called when the worker is completed. /// @@ -319,6 +370,8 @@ namespace Microsoft.Research.Calypso.Tools #endif IBackgroundWorkItem crt = this.current; this.current = null; + if (this.currentItemLabel != null) + this.currentItemLabel.Text = ""; crt.RunContinuation(e.Error); } this.Kick(); @@ -348,7 +401,11 @@ namespace Microsoft.Research.Calypso.Tools if (this.current != null) throw new Exception("current is not null"); this.current = this.queue[0]; + if (this.currentItemLabel != null) + this.currentItemLabel.Text = "Doing " + this.current.Description; this.queue.RemoveAt(0); + if (this.queueSizeLabel != null) + this.queueSizeLabel.Text = "Pending " + this.queue.Count + " items"; this.Start(); } @@ -411,9 +468,22 @@ namespace Microsoft.Research.Calypso.Tools } } + /// + /// Stop the queue. + /// public void Stop() { - // TODO + this.stopped = true; + this.CancelCurrentWork(); + } + + /// + /// Cancel the currently running work. + /// + public void CancelCurrentWork() + { + if (this.current == null) return; + this.current.Cancel(); } } @@ -3409,7 +3479,8 @@ namespace Microsoft.Research.Calypso.Tools /// Read the stream to the end from the current position. /// /// The contents of the stream. - string ReadToEnd(); + /// Can be used to cancel the reading. + string ReadToEnd(CancellationToken token); /// /// Read all the lines remaining in the stream. @@ -3465,11 +3536,15 @@ namespace Microsoft.Research.Calypso.Tools /// Read the whole stream to the end. /// /// A string containing the whole contents of the stream. - public virtual string ReadToEnd() + /// Can be used to cancel the reading. + public virtual string ReadToEnd(CancellationToken token) { StringBuilder result = new StringBuilder(); foreach (string s in this.ReadAllLines()) + { + token.ThrowIfCancellationRequested(); result.AppendLine(s); + } return result.ToString(); } @@ -3612,11 +3687,13 @@ namespace Microsoft.Research.Calypso.Tools /// Read the stream to the end from the current position. /// /// The contents of the stream. - public override string ReadToEnd() + /// Can be used to cancel the reading. + public override string ReadToEnd(CancellationToken token) { string result = this.actualReader.ReadToEnd(); if (this.cacheWriter != null) { + token.ThrowIfCancellationRequested(); this.cacheWriter.Write(result); this.cacheWriter.Close(); if (this.onClose != null) diff --git a/JobBrowser/UsefulForms/FilteredDataGridView.Designer.cs b/JobBrowser/UsefulForms/FilteredDataGridView.Designer.cs index 8a716ac..f28be25 100644 --- a/JobBrowser/UsefulForms/FilteredDataGridView.Designer.cs +++ b/JobBrowser/UsefulForms/FilteredDataGridView.Designer.cs @@ -18,7 +18,7 @@ See the Apache Version 2.0 License for specific language governing permissions a limitations under the License. */ -namespace Microsoft.Research.Calypso.Tools +namespace Microsoft.Research.Tools { partial class FilteredDataGridView { @@ -33,7 +33,7 @@ namespace Microsoft.Research.Calypso.Tools /// true if managed resources should be disposed; otherwise, false. protected override void Dispose(bool disposing) { - if (disposing && (components != null)) + if (disposing && (components != null)) { components.Dispose(); } diff --git a/JobBrowser/UsefulForms/FilteredDataGridView.cs b/JobBrowser/UsefulForms/FilteredDataGridView.cs index 7ad7d1c..37a60bf 100644 --- a/JobBrowser/UsefulForms/FilteredDataGridView.cs +++ b/JobBrowser/UsefulForms/FilteredDataGridView.cs @@ -23,7 +23,7 @@ using System; using System.Collections.Generic; using System.Windows.Forms; -namespace Microsoft.Research.Calypso.Tools +namespace Microsoft.Research.Tools { /// /// A data grid that filters the contents. diff --git a/JobBrowser/UsefulForms/PasswordDialog.Designer.cs b/JobBrowser/UsefulForms/PasswordDialog.Designer.cs index 0c3b543..1e67ce9 100644 --- a/JobBrowser/UsefulForms/PasswordDialog.Designer.cs +++ b/JobBrowser/UsefulForms/PasswordDialog.Designer.cs @@ -18,7 +18,7 @@ See the Apache Version 2.0 License for specific language governing permissions a limitations under the License. */ -namespace Microsoft.Research.Calypso.UsefulForms +namespace Microsoft.Research.UsefulForms { partial class PasswordDialog { diff --git a/JobBrowser/UsefulForms/PasswordDialog.cs b/JobBrowser/UsefulForms/PasswordDialog.cs index 694724b..a8d6e86 100644 --- a/JobBrowser/UsefulForms/PasswordDialog.cs +++ b/JobBrowser/UsefulForms/PasswordDialog.cs @@ -21,7 +21,7 @@ limitations under the License. using System.Windows.Forms; -namespace Microsoft.Research.Calypso.UsefulForms +namespace Microsoft.Research.UsefulForms { /// /// Prompt user for password. diff --git a/JobBrowser/UsefulForms/Properties/AssemblyInfo.cs b/JobBrowser/UsefulForms/Properties/AssemblyInfo.cs index 74404d8..334a73e 100644 --- a/JobBrowser/UsefulForms/Properties/AssemblyInfo.cs +++ b/JobBrowser/UsefulForms/Properties/AssemblyInfo.cs @@ -28,9 +28,9 @@ using System.Runtime.InteropServices; [assembly: AssemblyTitle("UsefulForms")] [assembly: AssemblyDescription("")] [assembly: AssemblyConfiguration("")] -[assembly: AssemblyCompany("Microsoft Corporation")] +[assembly: AssemblyCompany("Microsoft")] [assembly: AssemblyProduct("UsefulForms")] -[assembly: AssemblyCopyright("Copyright © Microsoft Corporation. All rights reserved.")] +[assembly: AssemblyCopyright("Copyright © Microsoft 2010")] [assembly: AssemblyTrademark("")] [assembly: AssemblyCulture("")] @@ -52,5 +52,5 @@ using System.Runtime.InteropServices; // You can specify all the values or you can default the Build and Revision Numbers // by using the '*' as shown below: // [assembly: AssemblyVersion("1.0.*")] -[assembly: AssemblyVersion("0.1.0.0")] -[assembly: AssemblyFileVersion("0.1.0.0")] +[assembly: AssemblyVersion("1.0.0.0")] +[assembly: AssemblyFileVersion("1.0.0.0")] diff --git a/JobBrowser/UsefulForms/Status.cs b/JobBrowser/UsefulForms/Status.cs index aefdb21..2684eef 100644 --- a/JobBrowser/UsefulForms/Status.cs +++ b/JobBrowser/UsefulForms/Status.cs @@ -23,7 +23,7 @@ using System.Drawing; using System; using System.Windows.Forms; -namespace Microsoft.Research.Calypso.Tools +namespace Microsoft.Research.Tools { /// /// Delegate used for invoking status messages from accross threads. diff --git a/JobBrowser/UsefulForms/UrlDialog.Designer.cs b/JobBrowser/UsefulForms/UrlDialog.Designer.cs index 18b5ddd..ff53ec3 100644 --- a/JobBrowser/UsefulForms/UrlDialog.Designer.cs +++ b/JobBrowser/UsefulForms/UrlDialog.Designer.cs @@ -18,7 +18,7 @@ See the Apache Version 2.0 License for specific language governing permissions a limitations under the License. */ -namespace Microsoft.Research.Calypso.UsefulForms +namespace Microsoft.Research.UsefulForms { partial class CustomDialog { diff --git a/JobBrowser/UsefulForms/UrlDialog.cs b/JobBrowser/UsefulForms/UrlDialog.cs index 7bf385f..39866e2 100644 --- a/JobBrowser/UsefulForms/UrlDialog.cs +++ b/JobBrowser/UsefulForms/UrlDialog.cs @@ -19,7 +19,7 @@ limitations under the License. */ -namespace Microsoft.Research.Calypso.UsefulForms +namespace Microsoft.Research.UsefulForms { using System.Windows.Forms; diff --git a/JobBrowser/UsefulForms/UsefulForms.csproj b/JobBrowser/UsefulForms/UsefulForms.csproj index d7cea04..4b7384e 100644 --- a/JobBrowser/UsefulForms/UsefulForms.csproj +++ b/JobBrowser/UsefulForms/UsefulForms.csproj @@ -1,4 +1,4 @@ - + Debug @@ -9,7 +9,7 @@ Library Properties UsefulForms - Microsoft.Research.DryadLinq.JobBrowser.UsefulForms + Microsoft.Research.Calypso.UsefulForms v4.5 512 @@ -33,27 +33,6 @@ true - - true - full - false - bin\Debug\ - DEBUG;TRACE - prompt - 4 - AllRules.ruleset - false - - - pdbonly - true - bin\Release\ - TRACE - prompt - 4 - AllRules.ruleset - false - true bin\x64\Debug\ @@ -72,6 +51,9 @@ prompt AllRules.ruleset + + OnBuildSuccess + @@ -152,4 +134,4 @@ --> - + \ No newline at end of file diff --git a/JobBrowser/UsefulForms/WarningBox.Designer.cs b/JobBrowser/UsefulForms/WarningBox.Designer.cs index 23f9ad7..35a0a78 100644 --- a/JobBrowser/UsefulForms/WarningBox.Designer.cs +++ b/JobBrowser/UsefulForms/WarningBox.Designer.cs @@ -18,7 +18,7 @@ See the Apache Version 2.0 License for specific language governing permissions a limitations under the License. */ -namespace Microsoft.Research.Calypso.UsefulForms +namespace Microsoft.Research.UsefulForms { partial class WarningBox { diff --git a/JobBrowser/UsefulForms/WarningBox.cs b/JobBrowser/UsefulForms/WarningBox.cs index e686741..0c01b52 100644 --- a/JobBrowser/UsefulForms/WarningBox.cs +++ b/JobBrowser/UsefulForms/WarningBox.cs @@ -22,7 +22,7 @@ limitations under the License. using System; using System.Windows.Forms; -namespace Microsoft.Research.Calypso.UsefulForms +namespace Microsoft.Research.UsefulForms { /// /// A box displaying a warning, which can be turned off. diff --git a/JobBrowser/doc/clusterBrowser.png b/JobBrowser/doc/clusterBrowser.png new file mode 100644 index 0000000..c113ff4 Binary files /dev/null and b/JobBrowser/doc/clusterBrowser.png differ diff --git a/JobBrowser/doc/clusterEditor.png b/JobBrowser/doc/clusterEditor.png new file mode 100644 index 0000000..055a40b Binary files /dev/null and b/JobBrowser/doc/clusterEditor.png differ diff --git a/JobBrowser/doc/clusterManipulation.png b/JobBrowser/doc/clusterManipulation.png new file mode 100644 index 0000000..19a4545 Binary files /dev/null and b/JobBrowser/doc/clusterManipulation.png differ diff --git a/JobBrowser/doc/clusterMenu.png b/JobBrowser/doc/clusterMenu.png new file mode 100644 index 0000000..e99b1c6 Binary files /dev/null and b/JobBrowser/doc/clusterMenu.png differ diff --git a/JobBrowser/doc/dynamicPlan.png b/JobBrowser/doc/dynamicPlan.png new file mode 100644 index 0000000..b855ddd Binary files /dev/null and b/JobBrowser/doc/dynamicPlan.png differ diff --git a/JobBrowser/doc/filteringVertex.png b/JobBrowser/doc/filteringVertex.png new file mode 100644 index 0000000..7b21cd6 Binary files /dev/null and b/JobBrowser/doc/filteringVertex.png differ diff --git a/JobBrowser/doc/jobBrowser.png b/JobBrowser/doc/jobBrowser.png new file mode 100644 index 0000000..bc09945 Binary files /dev/null and b/JobBrowser/doc/jobBrowser.png differ diff --git a/JobBrowser/doc/jobBrowserManual.docx b/JobBrowser/doc/jobBrowserManual.docx index 8aa752c..e4e52e0 100644 Binary files a/JobBrowser/doc/jobBrowserManual.docx and b/JobBrowser/doc/jobBrowserManual.docx differ diff --git a/JobBrowser/doc/jobFiltering.png b/JobBrowser/doc/jobFiltering.png new file mode 100644 index 0000000..63fc25d Binary files /dev/null and b/JobBrowser/doc/jobFiltering.png differ diff --git a/JobBrowser/doc/jobMenu.png b/JobBrowser/doc/jobMenu.png new file mode 100644 index 0000000..c502de7 Binary files /dev/null and b/JobBrowser/doc/jobMenu.png differ diff --git a/JobBrowser/doc/jobPlan.png b/JobBrowser/doc/jobPlan.png new file mode 100644 index 0000000..483bde1 Binary files /dev/null and b/JobBrowser/doc/jobPlan.png differ diff --git a/JobBrowser/doc/jobSchedule.png b/JobBrowser/doc/jobSchedule.png new file mode 100644 index 0000000..3797544 Binary files /dev/null and b/JobBrowser/doc/jobSchedule.png differ diff --git a/JobBrowser/doc/jobSummary.png b/JobBrowser/doc/jobSummary.png new file mode 100644 index 0000000..0e2a9c9 Binary files /dev/null and b/JobBrowser/doc/jobSummary.png differ diff --git a/JobBrowser/doc/stageCode.png b/JobBrowser/doc/stageCode.png new file mode 100644 index 0000000..689e1db Binary files /dev/null and b/JobBrowser/doc/stageCode.png differ diff --git a/JobBrowser/doc/stageFiltering.png b/JobBrowser/doc/stageFiltering.png new file mode 100644 index 0000000..66b4030 Binary files /dev/null and b/JobBrowser/doc/stageFiltering.png differ diff --git a/JobBrowser/doc/stageStatistics.png b/JobBrowser/doc/stageStatistics.png new file mode 100644 index 0000000..4edd223 Binary files /dev/null and b/JobBrowser/doc/stageStatistics.png differ diff --git a/JobBrowser/doc/stageVertices.png b/JobBrowser/doc/stageVertices.png new file mode 100644 index 0000000..01bd960 Binary files /dev/null and b/JobBrowser/doc/stageVertices.png differ diff --git a/JobBrowser/doc/staticPlan.png b/JobBrowser/doc/staticPlan.png new file mode 100644 index 0000000..7eedeec Binary files /dev/null and b/JobBrowser/doc/staticPlan.png differ diff --git a/JobBrowser/doc/vertexState.png b/JobBrowser/doc/vertexState.png new file mode 100644 index 0000000..dda7b54 Binary files /dev/null and b/JobBrowser/doc/vertexState.png differ diff --git a/JobBrowser/doc/vertexStatistics.png b/JobBrowser/doc/vertexStatistics.png new file mode 100644 index 0000000..eac39da Binary files /dev/null and b/JobBrowser/doc/vertexStatistics.png differ diff --git a/JobBrowser/doc/viewMenu.png b/JobBrowser/doc/viewMenu.png new file mode 100644 index 0000000..d2a68ab Binary files /dev/null and b/JobBrowser/doc/viewMenu.png differ diff --git a/JobBrowser/packages/Microsoft.Research.DryadLINQ.MSAGL.3.0.0.1/Microsoft.Research.DryadLINQ.MSAGL.3.0.0.1.nuspec b/JobBrowser/packages/Microsoft.Research.DryadLINQ.MSAGL.3.0.0.1/Microsoft.Research.DryadLINQ.MSAGL.3.0.0.1.nuspec deleted file mode 100644 index 1f349a2..0000000 --- a/JobBrowser/packages/Microsoft.Research.DryadLINQ.MSAGL.3.0.0.1/Microsoft.Research.DryadLINQ.MSAGL.3.0.0.1.nuspec +++ /dev/null @@ -1,27 +0,0 @@ - - - - Microsoft.Research.DryadLINQ.MSAGL - 3.0.0.1 - MSAGL Binary Package solely for use with DryadLINQ - msrsvc - msrsvc - http://research.microsoft.com/en-us/um/siliconvalley/projects/BigDataDev/MSAGL_License.txt - http://research.microsoft.com/en-us/projects/msagl/ - true - This is a binary package of the Microsoft Automatic Graph Layout engine solely for use with DryadLINQ. Please see the linked license for details. - © Microsoft Corporation. All rights reserved. - en-US - - - - - - - - - - - - - \ No newline at end of file diff --git a/JobBrowser/packages/Microsoft.Research.DryadLINQ.MSAGL.3.0.0.1/lib/Microsoft.Msagl.Drawing.dll b/JobBrowser/packages/Microsoft.Research.DryadLINQ.MSAGL.3.0.0.1/lib/Microsoft.Msagl.Drawing.dll deleted file mode 100644 index 89a7543..0000000 Binary files a/JobBrowser/packages/Microsoft.Research.DryadLINQ.MSAGL.3.0.0.1/lib/Microsoft.Msagl.Drawing.dll and /dev/null differ diff --git a/JobBrowser/packages/Microsoft.Research.DryadLINQ.MSAGL.3.0.0.1/lib/Microsoft.Msagl.GraphViewerGdi.dll b/JobBrowser/packages/Microsoft.Research.DryadLINQ.MSAGL.3.0.0.1/lib/Microsoft.Msagl.GraphViewerGdi.dll deleted file mode 100644 index 4a029f0..0000000 Binary files a/JobBrowser/packages/Microsoft.Research.DryadLINQ.MSAGL.3.0.0.1/lib/Microsoft.Msagl.GraphViewerGdi.dll and /dev/null differ diff --git a/JobBrowser/packages/Microsoft.Research.DryadLINQ.MSAGL.3.0.0.1/lib/Microsoft.Msagl.dll b/JobBrowser/packages/Microsoft.Research.DryadLINQ.MSAGL.3.0.0.1/lib/Microsoft.Msagl.dll deleted file mode 100644 index 84d6929..0000000 Binary files a/JobBrowser/packages/Microsoft.Research.DryadLINQ.MSAGL.3.0.0.1/lib/Microsoft.Msagl.dll and /dev/null differ diff --git a/LinqToDryad/Attributes.cs b/LinqToDryad/Attributes.cs index d5ea256..37b0fc5 100644 --- a/LinqToDryad/Attributes.cs +++ b/LinqToDryad/Attributes.cs @@ -29,16 +29,27 @@ using System.Diagnostics; namespace Microsoft.Research.DryadLinq { + /// + /// The Nullable attribute specifies if a field is nullable. The information is used by DryadLINQ + /// serialization. DryadLINQ serialization by default treats all fields not nullable. + /// [AttributeUsage(AttributeTargets.Field|AttributeTargets.Property|AttributeTargets.Class|AttributeTargets.Method, AllowMultiple = false)] public sealed class NullableAttribute : Attribute { private bool m_canBeNull; + /// + /// Initializes an instance of NullableAttribute. + /// + /// true iff the target of the attribute is nullable public NullableAttribute(bool canBeNull) { this.m_canBeNull = canBeNull; } + /// + /// Determines if the target of this attribute is nullable. + /// public bool CanBeNull { get { return this.m_canBeNull; } @@ -46,7 +57,7 @@ namespace Microsoft.Research.DryadLinq } [AttributeUsage(AttributeTargets.Method|AttributeTargets.Constructor, AllowMultiple = true)] - public sealed class FieldMappingAttribute : Attribute + internal sealed class FieldMappingAttribute : Attribute { private string m_source; private string m_destination; @@ -116,11 +127,11 @@ namespace Microsoft.Research.DryadLinq } /// - /// The Resource attribute is used to specify the computation cost of a function. - /// IsStateful asserts that the function is stateful; IsExpensive asserts that - /// the function is expensive to compute. The information is useful in generating - /// better execution plan. For example, expensive associative aggregation - /// functions can use multiple aggregation layers. + /// The Resource attribute is used to specify the computation cost of a user defined + /// function (UDF). IsStateful asserts that the function is stateful; IsExpensive + /// asserts that the function is expensive to compute. The information is useful in + /// generating better execution plan. For example, expensive associative aggregation + /// functions enables the use of multiple aggregation layers. /// [AttributeUsage(AttributeTargets.Method, AllowMultiple = false)] public sealed class ResourceAttribute : Attribute @@ -128,18 +139,28 @@ namespace Microsoft.Research.DryadLinq private bool m_isStateful; private bool m_isExpensive; + /// + /// Initializes an instance of the Resource attribute. The default value of + /// IsStateful is true; the default value of IsExpensive is false. + /// public ResourceAttribute() { this.m_isStateful = true; this.m_isExpensive = false; } + /// + /// Gets and sets the IsStateful flag. + /// public bool IsStateful { get { return this.m_isStateful; } set { this.m_isStateful = value; } } + /// + /// Gets and sets the IsExpensive flag. + /// public bool IsExpensive { get { return this.m_isExpensive; } @@ -147,33 +168,52 @@ namespace Microsoft.Research.DryadLinq } } + /// + /// Indicates that a method can be decomposed to multiple methods. The argument to the + /// constructor must be of type IDecomposable. The computation of the method annotated + /// by this attribute can be decomposed to a sequence of calls to the Seed, Accumulate, + /// RecursiveAccumulate methods and a FinalReduce. + /// + /// + /// If a method is decomposable, a user can annotate it with this attribute. This enables + /// DryadLINQ to perform a generalized "combiner" optimization. + /// [AttributeUsage(AttributeTargets.Method, AllowMultiple = false)] public sealed class DecomposableAttribute : Attribute { - Type m_decompositionType; + private Type m_decompositionType; + /// + /// Initializes an instance of DecomposableAttribute. The argument is a type that implements + /// . + /// + /// A type that implements IDecomposable{TSource, TAccumulate, TResult} public DecomposableAttribute(Type decompositionType) { m_decompositionType = decompositionType; } + /// + /// A type that implements IDecomposable{TSource, TAccumulate, TResult} where + /// TSource is the element type of the input, TAccumulate is the element type + /// of an intermediate dataset, and TResult is the output type of the method + /// annotated by this attribute. + /// public Type DecompositionType { - get { - return m_decompositionType; - } + get { return m_decompositionType; } } } /// - /// Indicates that a method can be used as an associative aggregation method. - /// The aggregation can either be via recursive calls to the tagged method, or - /// via top-level calls to the tagged method, followed by recursive calls to - /// a RecursiveAccumulate method. + /// Indicates that a method is an associative aggregation method. The argument to the + /// constructor must be of type IAssociative. The computation of the method annotated + /// by this attribute can be decomposed to a sequence of calls to the Seed and + /// RecursiveAccumulate methods. /// /// - /// If a recursive accumulator method is necessary, create type that implements - /// IAssociative and provide that to the ctor of this type. + /// If a method is associative, a user can annotate it with this attribute. This enables + /// DryadLINQ to perform the "combiner" optimization. /// [AttributeUsage(AttributeTargets.Method, AllowMultiple = false)] public sealed class AssociativeAttribute : Attribute @@ -181,29 +221,22 @@ namespace Microsoft.Research.DryadLinq private Type m_associativeType; /// - /// Creates an instance of AssociativeAttribute - /// - public AssociativeAttribute() - { - } - - /// - /// Creates an instance of AssociativeAttribute, with an associated type that provides - /// a recursive-accumulator method. + /// Initializes an instance of AssociativeAttribute. The argument is a type that implements + /// . /// /// /// During aggregation, the recursiveAccumulator will be used to aggregate items arising /// from the main aggregation. /// - /// A type that implements IAssociative{T,T} where T - /// is the output type of methods that are decorated with this attribute. + /// A type that implements IAssociative{T}, where T + /// is the output type of the method annotated by this attribute. public AssociativeAttribute(Type associativeType) { this.m_associativeType = associativeType; } /// - /// Type that implements IAssociative{T,T} where T is the output type of methods + /// A type that implements IAssociative{T} where T is the output type of methods /// that are decorated with this attribute. /// public Type AssociativeType @@ -212,12 +245,20 @@ namespace Microsoft.Research.DryadLinq } } + /// + /// Provides a user-defined serialization method for a .NET type. + /// [AttributeUsage(AttributeTargets.Class | AttributeTargets.Struct, AllowMultiple = false, Inherited=false)] public sealed class CustomDryadLinqSerializerAttribute : Attribute { + /// + /// Initializes an instance of CustomDryadLinqSerializer attribute. + /// + /// A type that implements IDryadLinqSerializer{T}, where T + /// is the .NET type to be serialized. public CustomDryadLinqSerializerAttribute(Type serializerType) { - SerializerType = serializerType; + this.SerializerType = serializerType; // We need to make sure serializerType implements IDryadLinqSerializer // However we will defer that check until DryadCodeGen.FindCustomSerializerType(), because @@ -225,6 +266,9 @@ namespace Microsoft.Research.DryadLinq // 2) because an exception coming from the attribute ctor leads to an obscure failure. } + /// + /// Gets and sets the type object for serialization. + /// public Type SerializerType { get; private set; } } diff --git a/LinqToDryad/BitVector.cs b/LinqToDryad/BitVector.cs index 91e81b6..9b95f16 100644 --- a/LinqToDryad/BitVector.cs +++ b/LinqToDryad/BitVector.cs @@ -23,15 +23,26 @@ using Microsoft.Research.DryadLinq; namespace Microsoft.Research.DryadLinq.Internal { + /// + /// A simple BitVector implementation. + /// public struct BitVector { private byte[] m_array; + /// + /// Initializes a new instance of the BitVector class. + /// + /// The number of bits for the bit vector. public BitVector(int length) { this.m_array = new byte[(length + 7) / 8]; } - + + /// + /// Initializes a new instance of the BitVector class from an array of boolean values. + /// + /// An array of boolean values representing a bit vector. public BitVector(bool[] values) { this.m_array = new byte[(values.Length + 7) / 8]; @@ -49,11 +60,21 @@ namespace Microsoft.Research.DryadLinq.Internal this.m_array = values; } + /// + /// Gets the bit at the specified index. + /// + /// An index into the bit vector. + /// The value of the bit as a boolean. public bool this[int index] { get { return this.Get(index); } } + /// + /// Gets the bit at the specified index. + /// + /// An index into the bit vector. + /// The value of the bit as a boolean. public bool Get(int index) { int idx = index / 8; @@ -61,11 +82,19 @@ namespace Microsoft.Research.DryadLinq.Internal (this.m_array[idx] & (1 << (index % 8))) != 0); } + /// + /// Sets the bit at the specified index. + /// + /// An index into the bit vector. public void Set(int index) { - m_array[index / 8] |= (byte)(1 << (index % 8)); + this.m_array[index / 8] |= (byte)(1 << (index % 8)); } - + + /// + /// Sets all the bits to the specified value. + /// + /// The value to be set for all bits. public void SetAll(bool value) { byte fillValue = 0; @@ -91,6 +120,11 @@ namespace Microsoft.Research.DryadLinq.Internal } } + /// + /// Reads a BitVector from the specified DryadLinqBinaryReader. + /// + /// The DryadLinqBinaryReader to read from. + /// A BitVector public static BitVector Read(DryadLinqBinaryReader reader) { Int32 len = reader.ReadCompactInt32(); @@ -102,6 +136,11 @@ namespace Microsoft.Research.DryadLinq.Internal return new BitVector(values); } + /// + /// Writes a BitVector to the specified DryadLinqBinaryWriter. + /// + /// The DryadLinqBinaryWriter to write to. + /// The BitVector to write public static void Write(DryadLinqBinaryWriter writer, BitVector bv) { bv.WriteInner(writer); diff --git a/LinqToDryad/CodeGenHelper.cs b/LinqToDryad/CodeGenHelper.cs index 1f01673..3b60f89 100644 --- a/LinqToDryad/CodeGenHelper.cs +++ b/LinqToDryad/CodeGenHelper.cs @@ -27,10 +27,10 @@ using System.Linq.Expressions; using System.Diagnostics; using Microsoft.Research.DryadLinq; +#pragma warning disable 1591 + namespace Microsoft.Research.DryadLinq.Internal { - //these are involved in generated code for member lookup - //eg for direct access to fields of IndexedValue. and probably other situations too. public delegate S GetObjFieldDelegate(T obj); public delegate void SetObjFieldDelegate(T obj, S value); diff --git a/LinqToDryad/Constants.cs b/LinqToDryad/Constants.cs index 995a769..0464af6 100644 --- a/LinqToDryad/Constants.cs +++ b/LinqToDryad/Constants.cs @@ -22,6 +22,14 @@ using System; namespace Microsoft.Research.DryadLinq { + /// + /// The root namespace for DryadLinq client programs + /// + [System.Runtime.CompilerServices.CompilerGeneratedAttribute()] + class NamespaceDoc + { + } + /// /// Constants used by Dryad and DryadLINQ /// @@ -94,25 +102,6 @@ namespace Microsoft.Research.DryadLinq public const uint DrError_ProcessingInterrupted = 0x830A001A; public const uint DrError_VertexHostLostCommunication = 0x830A0FFF; - // NodeAdmin constants - // Retain time set to one day - // todo: this should be configurable - public static readonly TimeSpan RetainTime = new TimeSpan(1, 0, 0, 0); - public static readonly TimeSpan FileTimeStampMarginForGC = new TimeSpan(0, 0, 5, 0); - public const string runningJobEnvVar = "CCP_RUNNING_JOBS"; - public const string replicaPathFormat = @"\\{0}\DscData\{1}.data"; - public const string nodeAdminMutexName = "A19A8AC1-4129-46e2-BB81-ED7EE3265B05"; - public const string nodeAdminUsage = "Syntax:\n\t" + - "DscNodeAdmin [/r] [/g] [/wd] [/e] [/v] [/u]\n\n" + - "Parameters:\n\t" + - "/? \t- Display this help message.\n\t" + - "/g \t- Delete files not managed by DSC from the DscData share.\n\t" + - "/wd\t- Delete old job working directories from the DscTemp share.\n\t" + - "/r \t- Replicate DSC files onto this node.\n\t" + - "/e \t- Print full error traces.\n\t" + - "/u \t- Resets DscReplication account password.\n\t" + - "/v \t- Print verbose activity traces.\n"; - // Client retry period is 1 second for first retry, increasing up to 12 seconds for a total of 30 seconds // These timeouts are intended to ride through transient network failures internal const int StartRetryPeriod = 1000; diff --git a/LinqToDryad/DataProvider.cs b/LinqToDryad/DataProvider.cs index 1d932a6..93128fe 100644 --- a/LinqToDryad/DataProvider.cs +++ b/LinqToDryad/DataProvider.cs @@ -39,7 +39,26 @@ using Microsoft.Research.Peloponnese.Storage; namespace Microsoft.Research.DryadLinq { - // DataProvider is an abstraction for different data backends. + /// + /// Specifies whether to compress the output of a DryadLINQ vertex. + /// + public enum CompressionScheme + { + /// + /// No compression. + /// + None, + + /// + /// Compression using gzip. + /// + Gzip + } + + /// + /// DataProvider provides an abstraction for different data backends. New data storage backends + /// could be added by subclassing this class. + /// public abstract class DataProvider { private static Dictionary s_providers; @@ -54,36 +73,84 @@ namespace Microsoft.Research.DryadLinq } /// - /// The prefix of this data provider. + /// The scheme of this data provider. /// public abstract string Scheme { get; } + /// + /// Gets the metadata of a specified dataset. + /// + /// A DryadLinqConext object. + /// The URI of the dataset. + /// The metadata. Returns null if metadats is not present. public abstract DryadLinqMetaData GetMetaData(DryadLinqContext context, Uri dataSetUri); + + /// + /// Gets information of a specified dataset. + /// + /// A DryadLinqContext object. + /// The URI of the dataset. + /// Information about a dataset. public abstract DryadLinqStreamInfo GetStreamInfo(DryadLinqContext context, Uri dataSetUri); - public abstract Uri GetTempDirectory(DryadLinqContext context); - private class DummyHiddenType { } + /// + /// Gets a URI to store a temporary dataset. + /// + /// A DryadLinqContext object. + /// A local path. + /// The URI of a temporary directory. + public abstract Uri GetTemporaryStreamUri(DryadLinqContext context, string path); - public Uri RewriteUri(DryadLinqContext context, Uri dataSetUri) - { - return RewriteUri(context, dataSetUri); - } - - public virtual Uri RewriteUri(DryadLinqContext context, Uri dataSetUri) + /// + /// Rewrites the URI of a dataset. Allows DataProvider specific rewriting. + /// + /// The element type of the specified dataset + /// The current DryadLinqContext. + /// The URI of the dataset. + /// The intended access to the dataset. + /// The rewritten URI of the dataset. + public virtual Uri RewriteUri(DryadLinqContext context, + Uri dataSetUri, + FileAccess access = FileAccess.Read) { return dataSetUri; } + /// + /// Ingress a .NET collection to a specified store location. + /// + /// The record type of the collection. + /// An instance of DryadLinqContext. + /// The collection to be ingressed. + /// The URI to store the collection. + /// The metadata for the collection. + /// The compression scheme used to store the collection. + /// true to only store the collection temporarily with a time lease. public abstract void Ingress(DryadLinqContext context, IEnumerable source, - Uri dataSetName, + Uri dataSetUri, DryadLinqMetaData metaData, CompressionScheme outputScheme, bool isTemp = false); + /// + /// Creates an instance of Stream for a dataset at a specified location. This is + /// used by DryadLINQ to read a .NET collection from a store. + /// + /// An instance of DryadLinqContext. + /// The URI of a dataset. + /// An instance of Stream. public abstract Stream Egress(DryadLinqContext context, Uri dataSetUri); - public abstract void CheckExistence(DryadLinqContext context, Uri dataSetUri, bool deleteIfExists); + /// + /// Checks the existence of a specified dataset. + /// + /// The current DryadLinqContext. + /// The URI of the dataset. + /// True to delete if the dataset exists. + public abstract void CheckExistence(DryadLinqContext context, + Uri dataSetUri, + bool deleteIfExists); /// /// The path separator of this data provider. @@ -93,8 +160,14 @@ namespace Microsoft.Research.DryadLinq get { return '/'; } } - public static void Register(string scheme, DataProvider provider) + /// + /// Registers a new . This can be used to extend DryadLINQ to + /// interact with a new kind of data store. + /// + /// A new DataProvider + public static void Register(DataProvider provider) { + string scheme = provider.Scheme; if (s_providers.ContainsKey(scheme)) { throw new DryadLinqException("Data provider for " + scheme + " has already existed."); @@ -122,6 +195,7 @@ namespace Microsoft.Research.DryadLinq /// Get the dataset specified by a URI. /// /// The record type of the dataset. + /// An instance of /// The URI of the dataset /// A query object representing the dsc file set data. internal static DryadLinqQuery GetPartitionedTable(DryadLinqContext context, Uri dataSetUri) @@ -133,16 +207,32 @@ namespace Microsoft.Research.DryadLinq return new DryadLinqQuery(null, queryProvider, dataProvider, dataSetUri); } - // Egress data from store to client. + /// + /// Reads a specified dataset. + /// + /// The record type of the dataset. + /// An instance of + /// The URI of the dataset. + /// A sequence of records as IEnumerable{T}. public static IEnumerable ReadData(DryadLinqContext context, Uri dataSetUri) { string scheme = DataPath.GetScheme(dataSetUri); DataProvider dataProvider = DataProvider.GetDataProvider(scheme); dataSetUri = dataProvider.RewriteUri(context, dataSetUri); - return new DryadLinqQueryEnumerable(dataProvider, context, dataSetUri); + return new DryadLinqQueryEnumerable(context, dataProvider, dataSetUri); } - // Ingress any IEnumerable data. Set the lease if it is temporary + /// + /// Stores an IEnumerable{T} at a specified location. + /// + /// The record type of the data. + /// An instance of + /// The data to store. + /// The URI of the store location. + /// The metadata of the data. + /// The compression scheme. + /// true if the data is only stored temporarily. + /// An instance of IQueryable{T} for the data. internal static DryadLinqQuery StoreData(DryadLinqContext context, IEnumerable source, Uri dataSetUri, @@ -158,11 +248,26 @@ namespace Microsoft.Research.DryadLinq } } + /// + /// Basic information of a dataset. + /// public class DryadLinqStreamInfo { + /// + /// The number of partitions of the dataset. Returns -1 if unknown. + /// public Int32 PartitionCount { get; private set; } + + /// + /// The size in bytes of the dataset. Returns -1 if unknown. + /// public Int64 DataSize { get; private set; } + /// + /// Initializes an instance of DryadLinqStreamInfo. + /// + /// The number of partitions. + /// The size in bytes. public DryadLinqStreamInfo(Int32 parCnt, Int64 size) { this.PartitionCount = parCnt; @@ -177,23 +282,22 @@ namespace Microsoft.Research.DryadLinq get { return DataPath.HDFS_URI_SCHEME; } } - public override Uri GetTempDirectory(DryadLinqContext context) + public override Uri GetTemporaryStreamUri(DryadLinqContext context, string path) { - UriBuilder builder = new UriBuilder(this.Scheme, context.DataNameNode, context.DataNameNodeDataPort); - builder.Path = DataPath.TEMPORARY_STREAM_NAME_PREFIX; - return builder.Uri; + return context.Cluster.MakeDefaultUri(DataPath.TEMPORARY_STREAM_NAME_PREFIX + path); } public override DryadLinqMetaData GetMetaData(DryadLinqContext context, Uri dataSetUri) { - throw new DryadLinqException("TBA"); + // TBD + return null; } public override DryadLinqStreamInfo GetStreamInfo(DryadLinqContext context, Uri dataSetUri) { Int32 parCnt = 0; Int64 size = -1; - context.DfsClient.GetContentSummary(dataSetUri.AbsolutePath, ref size, ref parCnt); + context.Cluster.DfsClient.GetContentSummary(dataSetUri.AbsolutePath, ref size, ref parCnt); if (parCnt == 0) { throw new DryadLinqException("Got 0 partition count for " + dataSetUri.AbsoluteUri); @@ -221,14 +325,11 @@ namespace Microsoft.Research.DryadLinq WebHdfsClient client = new WebHdfsClient(dataSetUri.Host, 8033, 50070); if (client.IsFileExists(dataSetUri.AbsolutePath)) { - if (deleteIfExists) - { - client.DeleteDfsFile(dataSetUri.AbsolutePath); - } - else + if (!deleteIfExists) { throw new DryadLinqException("Can't output to existing HDFS collection " + dataSetUri.AbsoluteUri); } + client.DeleteDfsFile(dataSetUri.AbsolutePath); } } } @@ -240,23 +341,19 @@ namespace Microsoft.Research.DryadLinq get { return DataPath.PARTFILE_URI_SCHEME; } } - public override Uri GetTempDirectory(DryadLinqContext context) + public override Uri GetTemporaryStreamUri(DryadLinqContext context, string path) { - UriBuilder builder = new UriBuilder(); - builder.Scheme = this.Scheme; - string dataNameNode = context.DataNameNode; - if (String.IsNullOrEmpty(dataNameNode)) - { - dataNameNode = Environment.MachineName; - } - builder.Host = dataNameNode; - builder.Path = DataPath.TEMPORARY_STREAM_NAME_PREFIX; - return builder.Uri; + string wd = Directory.GetCurrentDirectory(); + path = Path.Combine(Path.GetPathRoot(wd), DataPath.TEMPORARY_STREAM_NAME_PREFIX, path); + Directory.CreateDirectory(Path.GetDirectoryName(path)); + Uri uri = new Uri(this.Scheme + ":///" + path); + return uri; } public override DryadLinqMetaData GetMetaData(DryadLinqContext context, Uri dataSetUri) { - throw new DryadLinqException("TBA"); + // TBD + return null; } public override DryadLinqStreamInfo GetStreamInfo(DryadLinqContext context, Uri dataSetUri) @@ -285,21 +382,22 @@ namespace Microsoft.Research.DryadLinq bool isTemp = false) { // Write the partition: - string partPath = context.PartitionUncPath; - if (partPath == null) + string partDir = context.PartitionUncPath; + if (partDir == null) { - partPath = Path.GetDirectoryName(dataSetUri.LocalPath); + partDir = Path.GetDirectoryName(dataSetUri.LocalPath); } - if (!Path.IsPathRooted(partPath)) + if (!Path.IsPathRooted(partDir)) { - partPath = Path.Combine("/", partPath); + partDir = Path.Combine("/", partDir); } - partPath = Path.Combine(partPath, DryadLinqUtil.MakeUniqueName()); - Directory.CreateDirectory(partPath); - partPath = Path.Combine(partPath, "Part.00000000"); + partDir = Path.Combine(partDir, DryadLinqUtil.MakeUniqueName()); + Directory.CreateDirectory(partDir); + string partPath = Path.Combine(partDir, "Part"); + string partFilePath = partPath + ".00000000"; DryadLinqFactory factory = (DryadLinqFactory)DryadLinqCodeGen.GetFactory(context, typeof(T)); - using (FileStream fstream = new FileStream(partPath, FileMode.CreateNew, FileAccess.Write)) + using (FileStream fstream = new FileStream(partFilePath, FileMode.CreateNew, FileAccess.Write)) { DryadLinqFileBlockStream nativeStream = new DryadLinqFileBlockStream(fstream, compressionScheme); DryadLinqRecordWriter writer = factory.MakeWriter(nativeStream); @@ -311,12 +409,12 @@ namespace Microsoft.Research.DryadLinq } // Write the partfile: - FileInfo finfo = new FileInfo(partPath); + FileInfo finfo = new FileInfo(partFilePath); using (StreamWriter writer = File.CreateText(dataSetUri.LocalPath)) { - writer.WriteLine("thislineisignoredbecauseoftheoverride"); + writer.WriteLine(partPath); writer.WriteLine("1"); - writer.WriteLine("{0},{1},{2}:{3}", 0, finfo.Length, Environment.MachineName, partPath.TrimStart('\\', '/')); + writer.WriteLine("{0},{1},{2}", 0, finfo.Length, Environment.MachineName); } } @@ -328,6 +426,39 @@ namespace Microsoft.Research.DryadLinq { throw new DryadLinqException("The partition file " + dataSetUri + " is malformed."); } + string[] filePathArray = this.GetPartitionPaths(lines); + return new DryadLinqMultiFileStream(filePathArray, CompressionScheme.None); + } + + public override void CheckExistence(DryadLinqContext context, Uri dataSetUri, bool deleteIfExists) + { + string fileName = dataSetUri.LocalPath; + if (File.Exists(fileName)) + { + if (!deleteIfExists) + { + throw new DryadLinqException("Can't output to existing Partitioned File collection " + dataSetUri.AbsoluteUri); + } + + // Note: We delete all the partitions! + var lines = File.ReadAllLines(fileName); + try + { + foreach (string path in this.GetPartitionPaths(lines)) + { + if (File.Exists(path)) + { + File.Delete(path); + } + } + } + catch (Exception) { /*skip*/ } + File.Delete(fileName); + } + } + + private string[] GetPartitionPaths(string[] lines) + { bool isLocalPath = lines[0].Contains(':'); string[] filePathArray = new string[lines.Length - 2]; for (int i = 2; i < lines.Length; i++) @@ -348,23 +479,7 @@ namespace Microsoft.Research.DryadLinq filePathArray[idx] = String.Format(@"\\{0}\{1}.{2:X8}", fields[2], lines[0], idx); } } - return new DryadLinqMultiFileStream(filePathArray, CompressionScheme.None); - } - - public override void CheckExistence(DryadLinqContext context, Uri dataSetUri, bool deleteIfExists) - { - string fileName = dataSetUri.LocalPath; - if (File.Exists(fileName)) - { - if (deleteIfExists) - { - File.Delete(fileName); - } - else - { - throw new DryadLinqException("Can't output to existing Partitioned File collection " + dataSetUri.AbsoluteUri); - } - } + return filePathArray; } } @@ -383,15 +498,12 @@ namespace Microsoft.Research.DryadLinq get { return DataPath.AZUREBLOB_URI_SCHEME; } } - public override Uri GetTempDirectory(DryadLinqContext context) + public override Uri GetTemporaryStreamUri(DryadLinqContext context, string path) { - return AzureUtils.ToAzureUri(context.AzureAccountName, - context.AzureAccountKey(context.AzureAccountName), - context.AzureContainerName, - DataPath.TEMPORARY_STREAM_NAME_PREFIX); + return context.Cluster.MakeDefaultUri(DataPath.TEMPORARY_STREAM_NAME_PREFIX + path); } - public override Uri RewriteUri(DryadLinqContext context, Uri dataSetUri) + public override Uri RewriteUri(DryadLinqContext context, Uri dataSetUri, FileAccess access) { string account, key, container, blob; AzureUtils.FromAzureUri(dataSetUri, out account, out key, out container, out blob); @@ -404,19 +516,20 @@ namespace Microsoft.Research.DryadLinq query["key"] = context.AzureAccountKey(account); } - if (typeof(T) == typeof(Microsoft.Research.DryadLinq.LineRecord)) + if (access != FileAccess.Write && + typeof(T) == typeof(Microsoft.Research.DryadLinq.LineRecord)) { query["seekBoundaries"] = "Microsoft.Research.DryadLinq.LineRecord"; } builder.Query = query.ToString(); - return builder.Uri; } public override DryadLinqMetaData GetMetaData(DryadLinqContext context, Uri dataSetUri) { - throw new DryadLinqException("TBA"); + // TBD + return null; } public override DryadLinqStreamInfo GetStreamInfo(DryadLinqContext context, Uri dataSetUri) @@ -448,10 +561,27 @@ namespace Microsoft.Research.DryadLinq IEnumerable source, Uri dataSetUri, DryadLinqMetaData metaData, - CompressionScheme outputScheme, + CompressionScheme compressionScheme, bool isTemp = false) { - throw new DryadLinqException("TBA"); + string account, key, container, blob; + AzureUtils.FromAzureUri(dataSetUri, out account, out key, out container, out blob); + if (compressionScheme != CompressionScheme.None) + { + throw new DryadLinqException("Not implemented: writing to Azure temporary storage with compression enabled"); + } + AzureDfsClient client = new AzureDfsClient(account, key, container); + DryadLinqFactory factory = (DryadLinqFactory)DryadLinqCodeGen.GetFactory(context, typeof(T)); + using (Stream stream = client.GetFileStreamWriterAsync(blob).Result) + { + DryadLinqBlockStream nativeStream = new DryadLinqBlockStream(stream); + DryadLinqRecordWriter writer = factory.MakeWriter(nativeStream); + foreach (T rec in source) + { + writer.WriteRecordSync(rec); + } + writer.Close(); + } } public override Stream Egress(DryadLinqContext context, Uri dataSetUri) @@ -479,14 +609,11 @@ namespace Microsoft.Research.DryadLinq AzureCollectionPartition partition = new AzureCollectionPartition(dataSetUri); if (partition.IsCollectionExists()) { - if (deleteIfExists) - { - partition.DeleteCollection(); - } - else + if (!deleteIfExists) { throw new DryadLinqException("Can't output to existing Azure Blob collection " + dataSetUri.AbsoluteUri); } + partition.DeleteCollection(); } } } diff --git a/LinqToDryad/DryadLinqBinaryReader.cs b/LinqToDryad/DryadLinqBinaryReader.cs index 9294e7e..02a9092 100644 --- a/LinqToDryad/DryadLinqBinaryReader.cs +++ b/LinqToDryad/DryadLinqBinaryReader.cs @@ -32,8 +32,8 @@ using Microsoft.Research.DryadLinq.Internal; namespace Microsoft.Research.DryadLinq { /// - /// DryadLinqBinaryReader is the main interface for user provided custom serializers - /// or DL-internal autoserialization codepaths to read primitive types from a partition file. + /// DryadLinqBinaryReader is the main interface for user provided custom serializers. + /// It is also used for DryadLINQ internal autoserialization to read primitive types. /// public unsafe sealed class DryadLinqBinaryReader { @@ -79,6 +79,9 @@ namespace Microsoft.Research.DryadLinq { } + /// + /// The finalizer that frees native resources. + /// ~DryadLinqBinaryReader() { if (!this.m_isClosed) @@ -141,6 +144,10 @@ namespace Microsoft.Research.DryadLinq // // Public methods // + /// + /// Returns a string that represents this DryadLinqBinaryReader object. + /// + /// The string representation of this reader public override string ToString() { return this.m_nativeStream.ToString(); @@ -171,7 +178,7 @@ namespace Microsoft.Research.DryadLinq } /// - /// Read a byte from the current reader and advances the current position of the + /// Reads a byte from the current reader and advances the current position of the /// reader by one byte. /// /// The next byte read from the current reader. @@ -191,7 +198,7 @@ namespace Microsoft.Research.DryadLinq } /// - /// Read a signed byte from the current reader and advances the current + /// Reads a signed byte from the current reader and advances the current /// position of the reader by one byte. /// /// The next signed byte read from the current reader. @@ -211,7 +218,7 @@ namespace Microsoft.Research.DryadLinq } /// - /// Read a boolean value from the current reader and advances the current + /// Reads a boolean value from the current reader and advances the current /// position of the reader by one byte. /// /// true iff the byte is nonzero. @@ -232,7 +239,7 @@ namespace Microsoft.Research.DryadLinq } /// - /// Read a character from the current reader and advances the current position of the reader + /// Reads a character from the current reader and advances the current position of the reader /// according to the encoding and the character. /// /// A character read from the current reader. @@ -262,6 +269,10 @@ namespace Microsoft.Research.DryadLinq } } + /// + /// Reads a 16-bit signed integer from the current reader. + /// + /// A 16-bit signed integer. public short ReadInt16() { ushort low, high; @@ -278,6 +289,10 @@ namespace Microsoft.Research.DryadLinq return (short)(low | (high << 8)); } + /// + /// Reads a 16-bit unsigned integer from the current reader. + /// + /// A 16-bit unsigned integer. public ushort ReadUInt16() { ushort low, high; @@ -294,6 +309,10 @@ namespace Microsoft.Research.DryadLinq return (ushort)(low | (high << 8)); } + /// + /// Reads a 32-bit signed integer from the current reader. + /// + /// A 32-bit signed integer. public int ReadInt32() { int b1, b2, b3, b4; @@ -314,6 +333,11 @@ namespace Microsoft.Research.DryadLinq return (int)(b1 | b2 | b3 | b4); } + /// + /// Reads a 32-bit signed integer from the current reader. Assumes that the integer + /// is represented in the compact format as written by WriteCompact of a DryadLinqBinaryWriter. + /// + /// A 32-bit signed integer. public unsafe int ReadCompactInt32() { int b1, b2, b3, b4; @@ -341,6 +365,10 @@ namespace Microsoft.Research.DryadLinq } } + /// + /// Reads a 32-bit unsigned integer from the current reader. + /// + /// A 32-bit unsigned integer. public uint ReadUInt32() { int b1, b2, b3, b4; @@ -361,6 +389,10 @@ namespace Microsoft.Research.DryadLinq return (uint)(b1 | b2 | b3 | b4); } + /// + /// Reads a 64-bit signed integer from the current reader. + /// + /// A 64-bit signed integer. public long ReadInt64() { uint lo, hi; @@ -383,6 +415,10 @@ namespace Microsoft.Research.DryadLinq return (long)(((ulong)hi) << 32 | lo); } + /// + /// Reads a 64-bit unsigned integer from the current reader. + /// + /// A 64-bit unsigned integer. public ulong ReadUInt64() { uint lo, hi; @@ -405,12 +441,20 @@ namespace Microsoft.Research.DryadLinq return ((ulong)hi) << 32 | lo; } + /// + /// Reads a 32-bit floating point number from the current reader. + /// + /// A 32-bit floating point number. public float ReadSingle() { int tmp = this.ReadInt32(); return *((float*)&tmp); } + /// + /// Reads a decimal number from the current reader. + /// + /// A decimal number. public decimal ReadDecimal() { decimal val; @@ -418,6 +462,10 @@ namespace Microsoft.Research.DryadLinq return val; } + /// + /// Reads a 64-bit floating point number from the current reader. + /// + /// A 64-bit floating point number. public double ReadDouble() { ulong tmp = this.ReadUInt64(); @@ -427,12 +475,20 @@ namespace Microsoft.Research.DryadLinq private const Int64 TicksMask = 0x3FFFFFFFFFFFFFFF; private const Int32 KindShift = 62; + /// + /// Reads a value of DateTime from the current reader. + /// + /// A value of DateTime. public DateTime ReadDateTime() { UInt64 value = this.ReadUInt64(); return new DateTime((Int64)(value & TicksMask), (DateTimeKind)(value >> KindShift)); } + /// + /// Reads a value of SqlDateTime from the current reader. + /// + /// A value of SqlDateTime. public SqlDateTime ReadSqlDateTime() { int dayTicks = this.ReadInt32(); @@ -440,6 +496,10 @@ namespace Microsoft.Research.DryadLinq return new SqlDateTime(dayTicks, timeTicks); } + /// + /// Reads a value of Guid from the current reader. + /// + /// A value of Guid. public Guid ReadGuid() { Guid guid; @@ -453,7 +513,7 @@ namespace Microsoft.Research.DryadLinq /// /// The pre-allocated char array to read data into. /// The starting offset at which to begin reading chars into . - /// The maximum number of chars to read. Must be smaller than or equal to ( - ). + /// The maximum number of chars to read. /// The number of chars that was actually read. public unsafe int ReadChars(char[] destBuffer, int offset, int charCount) { @@ -561,6 +621,10 @@ namespace Microsoft.Research.DryadLinq return numCharsDecoded; } + /// + /// Reads a string value from the current reader. + /// + /// A string. public string ReadString() { // First read the length of the string and the number of bytes needed @@ -627,7 +691,7 @@ namespace Microsoft.Research.DryadLinq /// /// The pre-allocated byte array to read data into. /// The starting offset at which to begin reading bytes into . - /// The maximum number of bytes to read. Must be smaller than or equal to ( - ). + /// The maximum number of bytes to read. /// The number of bytes that was actually read. public int ReadBytes(byte[] destBuffer, int offset, int byteCount) { @@ -704,6 +768,8 @@ namespace Microsoft.Research.DryadLinq /// public helper to read into a byte*, mainly used to read preallocated fixed size, /// non-integer types (Array, Guid, decimal etc) /// + /// The pointer to the pre-allocated byte array to read data into + /// The number of bytes to read public void ReadRawBytes(byte* pBytes, int numBytes) { int numBytesRead = 0; diff --git a/LinqToDryad/DryadLinqBinaryWriter.cs b/LinqToDryad/DryadLinqBinaryWriter.cs index 77fa169..1d08b05 100644 --- a/LinqToDryad/DryadLinqBinaryWriter.cs +++ b/LinqToDryad/DryadLinqBinaryWriter.cs @@ -31,8 +31,8 @@ using Microsoft.Research.DryadLinq.Internal; namespace Microsoft.Research.DryadLinq { /// - /// DryadLinqBinaryWriter is the main interface for user provided custom serializers - /// or DL-internal autoserialization codepaths to write primitive types from a partition file. + /// DryadLinqBinaryWriter is the main interface for user provided custom serializers. + /// It is also used for DryadLINQ internal autoserialization to write primitive types. /// public unsafe sealed class DryadLinqBinaryWriter { @@ -45,8 +45,8 @@ namespace Microsoft.Research.DryadLinq private DataBlockInfo m_curDataBlockInfo; private byte* m_curDataBlock; // The current write buffer. This is allocated from the native stream, - // individual WriteXXX methods serialize primitives into this buffer, - // and it gets written out when it's full + // individual WriteXXX methods serialize primitives into this buffer, + // and it gets written out when it's full private Int32 m_curBlockSize; // Size of the current write buffer. private Int32 m_curRecordStart; private Int32 m_curRecordEnd; @@ -96,6 +96,9 @@ namespace Microsoft.Research.DryadLinq { } + /// + /// Frees native resources. + /// ~DryadLinqBinaryWriter() { if (!this.m_isClosed) @@ -250,6 +253,10 @@ namespace Microsoft.Research.DryadLinq // // Public methods // + /// + /// Writes an unsigned byte to the current writer. + /// + /// The unsigned byte to be written. public void Write(byte b) { if (this.m_curRecordEnd == this.m_curBlockSize) @@ -259,6 +266,10 @@ namespace Microsoft.Research.DryadLinq this.m_curDataBlock[this.m_curRecordEnd++] = b; } + /// + /// Writes a signed byte to the current writer. + /// + /// The signed byte to be written. public void Write(sbyte b) { if (this.m_curRecordEnd == this.m_curBlockSize) @@ -268,6 +279,10 @@ namespace Microsoft.Research.DryadLinq this.m_curDataBlock[this.m_curRecordEnd++] = (byte)b; } + /// + /// Writes a boolean value to the current writer. + /// + /// The boolean value to be written. public void Write(bool b) { if (this.m_curRecordEnd == this.m_curBlockSize) @@ -277,6 +292,10 @@ namespace Microsoft.Research.DryadLinq this.m_curDataBlock[this.m_curRecordEnd++] = (byte)(b ? 1 : 0); } + /// + /// Writes a character to the current writer. + /// + /// The character to be written. public void Write(char ch) { if (this.m_curBlockSize - this.m_curRecordEnd < this.m_charMaxByteCount) @@ -288,6 +307,10 @@ namespace Microsoft.Research.DryadLinq this.m_curRecordEnd += numBytes; } + /// + /// Writes a signed 16-bit integer to the current writer. + /// + /// The signed 16-bit integer to be written. public void Write(short val) { if (this.m_curBlockSize - this.m_curRecordEnd < 2) @@ -298,6 +321,10 @@ namespace Microsoft.Research.DryadLinq this.m_curDataBlock[this.m_curRecordEnd++] = (byte)(val >> 8); } + /// + /// Writes an unsigned 16-bit integer to the current writer. + /// + /// The unsigned 16-bit integer to be written. public void Write(ushort val) { if (this.m_curBlockSize - this.m_curRecordEnd < 2) @@ -308,6 +335,10 @@ namespace Microsoft.Research.DryadLinq this.m_curDataBlock[this.m_curRecordEnd++] = (byte)(val >> 8); } + /// + /// Writes a signed 32-bit integer to the current writer. + /// + /// The signed 32-bit integer to be written. public void Write(int val) { if (this.m_curBlockSize - this.m_curRecordEnd < 4) @@ -320,6 +351,11 @@ namespace Microsoft.Research.DryadLinq this.m_curDataBlock[this.m_curRecordEnd++] = (byte)(val >> 24); } + /// + /// Writes a 32-bit signed integer to the current writer. The integer + /// is written in a compact format. + /// + /// The integer to be written. public void WriteCompact(int val) { if (this.m_curBlockSize - this.m_curRecordEnd < 4) @@ -348,6 +384,7 @@ namespace Microsoft.Research.DryadLinq { this.m_curDataBlock[loc++] = (byte)val; this.m_curDataBlock[loc++] = (byte)(val >> 8); + this.m_curDataBlock[loc++] = (byte)(val >> 16); this.m_curDataBlock[loc++] = (byte)(val >> 24); } @@ -367,6 +404,10 @@ namespace Microsoft.Research.DryadLinq } } + /// + /// writes an unsigned 32-bit integer to the current writer. + /// + /// The unsigned 32-bit integer to be written. public void Write(uint val) { if (this.m_curBlockSize - this.m_curRecordEnd < 4) @@ -379,6 +420,10 @@ namespace Microsoft.Research.DryadLinq this.m_curDataBlock[this.m_curRecordEnd++] = (byte)(val >> 24); } + /// + /// Writes a signed 64-bit integer to the current writer. + /// + /// The signed 64-bit integer to write. public void Write(long val) { if (this.m_curBlockSize - this.m_curRecordEnd < 8) @@ -395,6 +440,10 @@ namespace Microsoft.Research.DryadLinq this.m_curDataBlock[this.m_curRecordEnd++] = (byte)(val >> 56); } + /// + /// Writes a unsigned 64-bit integer to the current writer. + /// + /// The unsigned 64-bit integer to write. public void Write(ulong val) { if (this.m_curBlockSize - this.m_curRecordEnd < 8) @@ -411,42 +460,70 @@ namespace Microsoft.Research.DryadLinq this.m_curDataBlock[this.m_curRecordEnd++] = (byte)(val >> 56); } - public void Write(decimal val) - { - this.WriteRawBytes((byte*)&val, sizeof(decimal)); - } - + /// + /// Writes a 32-bit floating point number to the current writer. + /// + /// A 32-bit floating point number to write. public void Write(float val) { uint tmpVal = *(uint*)&val; this.Write(tmpVal); } + /// + /// Writes a 64-bit floating point number to the current writer. + /// + /// A 64-bit floating point number to write. public void Write(double val) { ulong tmpVal = *(ulong*)&val; this.Write(tmpVal); } + /// + /// Writes a decimal number to the current writer. + /// + /// A decimal value to write + public void Write(decimal val) + { + this.WriteRawBytes((byte*)&val, sizeof(decimal)); + } + private const Int32 KindShift = 62; + /// + /// Writes a value of DateTime to the current writer. + /// + /// A value of DateTime to write. public void Write(DateTime val) { UInt64 tempVal = (UInt64)val.Ticks | (((UInt64)val.Kind) << KindShift); this.Write(tempVal); } + /// + /// Writes a value of SqlDateTime to the current writer. + /// + /// A value of SqlDateTime to write. public void Write(SqlDateTime val) { this.Write(val.DayTicks); this.Write(val.TimeTicks); } + /// + /// Writes a value of Guid to the current writer. + /// + /// The value of Guid. public void Write(Guid guid) { WriteRawBytes((byte*)&guid, sizeof(Guid)); } + /// + /// Writes a string to the current writer. + /// + /// The string to write. public void Write(string val) { Int32 len = val.Length; @@ -472,6 +549,12 @@ namespace Microsoft.Research.DryadLinq this.WriteCompact(numBytes, compactSize, buffLoc); } + /// + /// Writes an array of characters to the current writer. + /// + /// The array of characters. + /// The starting index. + /// The number of characters to write. public void WriteChars(char[] charBuffer, int offset, int charCount) { if (charBuffer == null) @@ -515,6 +598,12 @@ namespace Microsoft.Research.DryadLinq this.m_curRecordEnd += numBytes; } + /// + /// Writes an array of bytes to the current writer. + /// + /// The byte array to write. + /// The starting index. + /// The number of bytes to write. public void WriteBytes(byte[] byteBuffer, int offset, int byteCount) { if (byteBuffer == null) @@ -551,6 +640,8 @@ namespace Microsoft.Research.DryadLinq /// Public helper to write from a caller provided byte* to the output stream. /// This is mainly used to read preallocated fixed size, non-integer types (Guid, decimal etc). /// + /// A pointer to the byte array to write + /// The number of bytes to write public void WriteRawBytes(byte* pBytes, Int32 numBytes) { while (this.m_curBlockSize - this.m_curRecordEnd < numBytes) @@ -561,6 +652,10 @@ namespace Microsoft.Research.DryadLinq this.m_curRecordEnd += numBytes; } + /// + /// Returns a string that represents the current DryadLinqBinaryWriter object. + /// + /// The string representation of this writer public override string ToString() { return this.m_nativeStream.ToString(); diff --git a/LinqToDryad/DryadLinqBlockStream.cs b/LinqToDryad/DryadLinqBlockStream.cs index 849d205..06e45cc 100644 --- a/LinqToDryad/DryadLinqBlockStream.cs +++ b/LinqToDryad/DryadLinqBlockStream.cs @@ -145,7 +145,7 @@ namespace Microsoft.Research.DryadLinq.Internal // options |= FileOptions.Asynchronous; this.m_fstream = new FileStream(filePath, mode, access, FileShare.Read, DefaultBuffSize, options); } - catch(Exception e) + catch (Exception e) { throw new DryadLinqException(DryadLinqErrorCode.CannotAccesFilePath, String.Format(SR.CannotAccesFilePath , filePath),e); diff --git a/LinqToDryad/DryadLinqCodeGen.cs b/LinqToDryad/DryadLinqCodeGen.cs index 0958734..7df6ea9 100644 --- a/LinqToDryad/DryadLinqCodeGen.cs +++ b/LinqToDryad/DryadLinqCodeGen.cs @@ -38,13 +38,16 @@ namespace Microsoft.Research.DryadLinq.Internal { using CodeStmtPair = Pair; - // This class generates and compiles the managed code executed by DryadLinq. - // It creates a managed library (DLL) that gets used by DryadLinq vertices. - // - // For each type of DryadLinq vertex node, we need to call - // AddDryadCodeForType(node.OutputType); - // AddVertexMethod(node); - // This should generate all the code described in the note. + /// + /// This class generates and compiles the managed code executed by DryadLinq. + /// It creates a managed library (DLL) that contains the entry method for each + /// DryadLINQ vertex. + /// + /// For each type of DryadLINQ vertex node, we need to call + /// AddDryadCodeForType(node.OutputType); + /// AddVertexMethod(node); + /// + /// A DryadLINQ user should not use this class directly. public class DryadLinqCodeGen { private const BindingFlags FieldFlags = BindingFlags.Instance|BindingFlags.Public|BindingFlags.NonPublic; @@ -54,7 +57,7 @@ namespace Microsoft.Research.DryadLinq.Internal private const string TargetDllName = "Microsoft.Research.DryadLinq_.dll"; private const string VertexSourceFile = "Microsoft.Research.DryadLinq_.cs"; private const string DummyExtensionSourceFile = "DryadLinqExtensionBase.cs"; - private const string VertexParamName = "vertexparam"; + private const string VertexParamName = "vertexParams"; private const string HelperClassName = "DryadLinqHelper"; private const string DebugHelperMethodName = "CheckVertexDebugRequest"; private const string CopyResourcesMethodName = "CopyResources"; @@ -2308,6 +2311,13 @@ namespace Microsoft.Research.DryadLinq.Internal this.GenerateCodeAndCompile(dummyFile, srcFile, targetName, loadGeneratedAssembly); } + /// + /// Gets the for a specified type. If a factory doesn't exist, + /// the method generates the serialization code and creates a new factory for the type. + /// + /// An instnance of + /// A specified type + /// A for the type public static object GetFactory(DryadLinqContext context, Type type) { lock (s_codeGenLock) diff --git a/LinqToDryad/DryadLinqCollection.cs b/LinqToDryad/DryadLinqCollection.cs index c2b6fd0..a179e04 100644 --- a/LinqToDryad/DryadLinqCollection.cs +++ b/LinqToDryad/DryadLinqCollection.cs @@ -24,47 +24,84 @@ using System.Collections.Generic; using System.Linq; using Microsoft.Research.DryadLinq; +#pragma warning disable 1591 + namespace Microsoft.Research.DryadLinq.Internal { + /// + /// A value with an associated integer index. + /// + /// [Serializable] public struct IndexedValue : IEquatable>, IComparable> { private int _index; private T _value; - public int Index - { - get { return _index; } - set { _index = value; } - } - - public T Value - { - get { return _value; } - set { _value = value; } - } - + /// + /// Initializes a new instance of IndexedValue. + /// + /// The index. + /// The value public IndexedValue(int index, T value) { _index = index; _value = value; } + /// + /// Gets and sets the index of the value. + /// + public int Index + { + get { return _index; } + set { _index = value; } + } + + /// + /// Gets and sets the value. + /// + public T Value + { + get { return _value; } + set { _value = value; } + } + + /// + /// Determines whether the specified IndexedValue is equal to the current IndexedValue. + /// + /// The IndexedValue to compare with. + /// true if the specified IndexedValue is equal to the current IndexedValue. public bool Equals(IndexedValue val) { return this.Index == val.Index; } + /// + /// Compares this IndexedValue with another instance of IndexedValue. Returns an integer that + /// indicates the ordering. + /// + /// The IndexedValue to compare with. + /// An integer that indicates the ordering. public int CompareTo(IndexedValue val) { return this.Index - val.Index; } + /// + /// Use the index as the hash function. + /// + /// An integer hashcode public override int GetHashCode() { return this.Index; } + /// + /// Determines whether the specified object is equal to the current IndexedValue. + /// + /// The object to compare with. + /// true if the specified object is equal to the current IndexedValue. public override bool Equals(object obj) { if (!(obj is IndexedValue)) @@ -74,11 +111,23 @@ namespace Microsoft.Research.DryadLinq.Internal return this.Equals((IndexedValue)obj); } + /// + /// Determines whether two instances of IndexedValue are equal. + /// + /// The left IndexedValue. + /// The right IndexedValue. + /// true if the two values are equal. public static bool operator ==(IndexedValue a, IndexedValue b) { return a.Equals(b); } + /// + /// Determines whether two instances of IndexedValue are not equal. + /// + /// The left IndexedValue. + /// The right IndexedValue. + /// true if the two values are not equal. public static bool operator !=(IndexedValue a, IndexedValue b) { return !a.Equals(b); diff --git a/LinqToDryad/DryadLinqContext.cs b/LinqToDryad/DryadLinqContext.cs index bd913f3..944c6fc 100644 --- a/LinqToDryad/DryadLinqContext.cs +++ b/LinqToDryad/DryadLinqContext.cs @@ -26,6 +26,8 @@ using System.Diagnostics; using System.Linq.Expressions; using System.Reflection; using System.IO; +using System.Security.Cryptography.X509Certificates; +using System.Threading.Tasks; using Microsoft.Research.DryadLinq.Internal; using Microsoft.Research.Peloponnese.ClusterUtils; using Microsoft.Research.Peloponnese.Storage; @@ -33,10 +35,13 @@ using Microsoft.Research.Peloponnese.Storage; namespace Microsoft.Research.DryadLinq { /// - /// We currently support two schedulers. + /// The executor to run DryadLINQ jobs. The current release only supports Dryad. /// public enum ExecutorKind { + /// + /// Run DryadLINQ using Dryad. + /// DRYAD } @@ -46,9 +51,277 @@ namespace Microsoft.Research.DryadLinq public enum PlatformKind { /// - /// run on a YARN cluster (not yet implemented) + /// run directly on a YARN cluster /// - YARN + YARN_NATIVE, + + /// + /// run on a YARN cluster in Azure HDInsight + /// + YARN_AZURE, + + /// + /// run locally at client side + /// + LOCAL + } + + /// + /// Base interface for cluster types that the DryadLinqContext constructor can accept. + /// + public interface DryadLinqCluster + { + /// + /// Gets the service platform of this cluster. + /// + PlatformKind Kind { get; } + /// + /// Gets the hostname of the head node of the cluster. + /// + string HeadNode { get; } + /// + /// Gets the client DFS interface. + /// + IDfsClient DfsClient { get; } + /// + /// Gets the client cluster interface. + /// + /// An instnace of DryadLinqContext + /// The client interface to the cluster + ClusterClient Client(DryadLinqContext context); + /// + /// Makes a new unique URI for storing a dataset in the DFS. + /// + /// A user provided local path + /// A new unique URI that can be used to store a dataset + Uri MakeDefaultUri(string path); + } + + /// + /// The interface for a YARN native cluster. + /// + internal class DryadLinqYarnCluster : DryadLinqCluster + { + /// + /// The hostname of the computer where the YarnLauncher program is running + /// + public string HeadNode { get; set; } + /// + /// The port where the YarnLauncher program is listening + /// + public int LauncherPort; + /// + /// The hostname of the computer where the default HDFS instance is running + /// + public string NameNode; + /// + /// The port that the Hdfs protocol is listening on + /// + public int HdfsPort; + /// + /// The port that the WebHdfs protocol is listening on + /// + public int WebHdfsPort; + + private WebHdfsClient _dfsClient; + private NativeYarnClient _clusterClient; + + /// + /// Make a new cluster object representing a YARN cluster with default ports + /// + /// The computer where the YarnLauncher is running + public DryadLinqYarnCluster(string headNode) + { + HeadNode = headNode; + LauncherPort = 8471; + + NameNode = headNode; + HdfsPort = 9000; + WebHdfsPort = 50070; + + _dfsClient = null; + _clusterClient = null; + } + + public PlatformKind Kind { get { return PlatformKind.YARN_NATIVE; } } + public IDfsClient DfsClient { + get + { + if (_dfsClient == null) + { + _dfsClient = new WebHdfsClient(HeadNode, HdfsPort, WebHdfsPort); + } + return _dfsClient; + } + } + + public ClusterClient Client(DryadLinqContext context) + { + if (_clusterClient == null) + { + _clusterClient = new NativeYarnClient(HeadNode, HdfsPort, LauncherPort); + } + return _clusterClient; + } + + public Uri MakeDefaultUri(string path) + { + return _dfsClient.MakeDfsUri(path); + } + } + + /// + /// The interface for a YARN Azure cluster. + /// + internal class DryadLinqAzureCluster : DryadLinqCluster + { + /// + /// The name of the HDInsight cluster + /// + public string HeadNode { get { return _cluster.Result.Name; } } + + private readonly AzureSubscriptions _azureSubscriptions; + private readonly Task _cluster; + private readonly Task _dfsClient; + private Task _clusterClient; + + /// + /// Make a new cluster object representing an Azure HDInsight cluster, reading the details + /// from a subscription stored in the Powershell defaults. + /// + /// The name of the HDInsight cluster + public DryadLinqAzureCluster(string clusterName) + { + // start fetching details about the subscriptions, available clusters, etc. + _azureSubscriptions = new AzureSubscriptions(); + _cluster = _azureSubscriptions.GetClusterAsync(clusterName); + _dfsClient = _cluster.ContinueWith(c => new AzureDfsClient(c.Result.StorageAccount, c.Result.StorageKey, "staging")); + } + + /// + /// Make a new cluster object representing an Azure HDInsight cluster, specifying the details + /// manually. + /// + /// The name of the HDInsight cluster + /// The storage account to use for staging job resources + /// The storage account container to use for staging job resources + /// The storage account key, which will be looked up in the subscription if null + public DryadLinqAzureCluster(string clusterName, string storageAccount, string storageContainer, string storageKey = null) + { + // start fetching details about the subscriptions, available clusters, etc. + _azureSubscriptions = new AzureSubscriptions(); + if (storageKey != null) + { + _azureSubscriptions.AddAccount(storageAccount, storageKey); + } + _cluster = _azureSubscriptions.GetClusterAsync(clusterName) + .ContinueWith(t => { t.Result.SetStorageAccount(storageAccount, storageKey); return t.Result; }); + _dfsClient = _cluster.ContinueWith(c => new AzureDfsClient(c.Result.StorageAccount, c.Result.StorageKey, storageContainer)); + } + + /// + /// Make a new cluster object representing an Azure HDInsight cluster, specifying the details + /// manually. + /// + /// The name of the HDInsight cluster + /// The ID of the subscription to fetch cluster details from + /// The thumbprint of the certificate associated with the subscription + public DryadLinqAzureCluster(string clusterName, string subscriptionId, string certificateThumbprint) + { + // start fetching details about the subscriptions, available clusters, etc. + _azureSubscriptions = new AzureSubscriptions(); + _azureSubscriptions.AddSubscription(subscriptionId, certificateThumbprint); + _cluster = _azureSubscriptions.GetClusterAsync(clusterName); + _dfsClient = _cluster.ContinueWith(c => new AzureDfsClient(c.Result.StorageAccount, c.Result.StorageKey, "staging")); + } + + /// + /// Make a new cluster object representing an Azure HDInsight cluster, specifying the details + /// manually. + /// + /// The name of the HDInsight cluster + /// The ID of the subscription to fetch cluster details from + /// The certificate associated with the subscription + public DryadLinqAzureCluster(string clusterName, string subscriptionId, X509Certificate2 certificate) + { + // start fetching details about the subscriptions, available clusters, etc. + _azureSubscriptions = new AzureSubscriptions(); + _azureSubscriptions.AddSubscription(subscriptionId, certificate); + _cluster = _azureSubscriptions.GetClusterAsync(clusterName); + _dfsClient = _cluster.ContinueWith(c => new AzureDfsClient(c.Result.StorageAccount, c.Result.StorageKey, "staging")); + } + + /// + /// Make a new cluster object representing an Azure HDInsight cluster, specifying the details + /// manually. + /// + /// The name of the HDInsight cluster + /// The ID of the subscription to fetch cluster details from + /// The thumbprint of the certificate associated with the subscription + /// The storage account to use for staging job resources + /// The storage account container to use for staging job resources + /// The storage account key, which will be looked up in the subscription if null + public DryadLinqAzureCluster(string clusterName, string subscriptionId, string certificateThumbprint, + string storageAccount, string storageContainer, string storageKey = null) + { + // start fetching details about the subscriptions, available clusters, etc. + _azureSubscriptions = new AzureSubscriptions(); + if (storageKey != null) + { + _azureSubscriptions.AddAccount(storageAccount, storageKey); + } + _azureSubscriptions.AddCluster(clusterName, storageAccount, storageKey, subscriptionId, certificateThumbprint); + _cluster = _azureSubscriptions.GetClusterAsync(clusterName); + _dfsClient = _cluster.ContinueWith(c => new AzureDfsClient(c.Result.StorageAccount, c.Result.StorageKey, storageContainer)); + } + + /// + /// Make a new cluster object representing an Azure HDInsight cluster, specifying the details + /// manually + /// + /// The name of the HDInsight cluster + /// The ID of the subscription to fetch cluster details from + /// The certificate associated with the subscription + /// The storage account to use for staging job resources + /// The storage account container to use for staging job resources + /// The storage account key, which will be looked up in the subscription if null + public DryadLinqAzureCluster(string clusterName, string subscriptionId, X509Certificate2 certificate, + string storageAccount, string storageContainer, string storageKey = null) + { + // start fetching details about the subscriptions, available clusters, etc. + _azureSubscriptions = new AzureSubscriptions(); + if (storageKey != null) + { + _azureSubscriptions.AddAccount(storageAccount, storageKey); + } + _azureSubscriptions.AddCluster(clusterName, storageAccount, storageKey, subscriptionId, certificate); + _cluster = _azureSubscriptions.GetClusterAsync(clusterName); + _dfsClient = _cluster.ContinueWith( + c => new AzureDfsClient(c.Result.StorageAccount, c.Result.StorageKey, storageContainer)); + } + + public PlatformKind Kind { get { return PlatformKind.YARN_AZURE; } } + + internal AzureSubscriptions Subscriptions { get { return _azureSubscriptions; } } + + public AzureCluster Cluster { get { return _cluster.Result; } } + + public IDfsClient DfsClient { get { return _dfsClient.Result; } } + + public ClusterClient Client(DryadLinqContext context) + { + if (_clusterClient == null) + { + _clusterClient = _dfsClient.ContinueWith( + c => new AzureYarnClient(_azureSubscriptions, c.Result, context.PeloponneseHomeDirectory, Cluster.Name)); + } + return _clusterClient.Result; + } + + public Uri MakeDefaultUri(string path) + { + return AzureUtils.ToAzureUri(_dfsClient.Result.AccountName, _dfsClient.Result.ContainerName, path, null, _dfsClient.Result.AccountKey); + } } /// @@ -70,28 +343,14 @@ namespace Microsoft.Research.DryadLinq /// public class DryadLinqContext : IDisposable, IEquatable { - private const int DscNameNodeDataPort = 6498; //TODO: Read Config - private const int HdfsNameNodeHttpPort = 8033; //TODO: Read Config - private const int HdfsNameNodeDataPort = 9000; //TODO: Read Config - private ExecutorKind _executorKind = ExecutorKind.DRYAD; - private PlatformKind _platformKind = PlatformKind.YARN; + private PlatformKind _platformKind = PlatformKind.LOCAL; private string _headNode; - private string _dataNameNode; - private DryadLinqQueryRuntime _runtime; - private DscService _dscService; - private IDfsClient _dfsClient; - private ClusterClient _clusterClient; - private int _dataNameNodeDataPort; - private int _dataNameNodeHttpPort; - - private string _azureAccountName; - private Dictionary _azureAccountKeyDictionary; - private string _azureContainerName; + private DryadLinqCluster _clusterDetails; + private AzureSubscriptions _azureSubscriptions; private Version _clientVersion; - private Version _serverVersion; private CompressionScheme _intermediateDataCompressionScheme = CompressionScheme.None; private CompressionScheme _outputCompressionScheme = CompressionScheme.None; @@ -117,7 +376,6 @@ namespace Microsoft.Research.DryadLinq private bool _multiThreading = true; private string _partitionUncPath = null; private string _storageSetScheme = null; - private Uri _tempDatasetDirectory = null; private DryadLinqStringDictionary _jobEnvironmentVariables = new DryadLinqStringDictionary(); private DryadLinqStringList _resourcesToAdd = new DryadLinqStringList(); private DryadLinqStringList _resourcesToRemove = new DryadLinqStringList(); @@ -127,132 +385,117 @@ namespace Microsoft.Research.DryadLinq private string _dryadHome; private string _peloponneseHome; + private static DryadLinqCluster MakeCluster(string clusterName, PlatformKind kind) + { + if (kind == PlatformKind.LOCAL) + { + throw new DryadLinqException("Can't make a cluster of kind LOCAL"); + } + else if (kind == PlatformKind.YARN_NATIVE) + { + return new DryadLinqYarnCluster(clusterName); + } + else if (kind == PlatformKind.YARN_AZURE) + { + return new DryadLinqAzureCluster(clusterName); + } + else + { + throw new DryadLinqException("Unknown cluster kind " + kind); + } + } + /// /// Initializes a new instance of the DryadLinqContext class for local execution. /// /// The number of local worker processes that should be started. + /// The default scheme for storage. Defaults to partitioned file public DryadLinqContext(int numProcesses, string storageSetScheme = null) { - this._platformKind = PlatformKind.YARN; - this._runtime = new DryadLinqQueryRuntime(this._headNode); + this.CommonInit(); + this._platformKind = PlatformKind.LOCAL; this._localExecution = true; - this._headNode = String.Empty; - this._dataNameNode = null; + this._headNode = "LocalExecution"; this._storageSetScheme = storageSetScheme; if (String.IsNullOrEmpty(this._storageSetScheme)) { this._storageSetScheme = DataPath.PARTFILE_URI_SCHEME; } - DataProvider dataProvider = DataProvider.GetDataProvider(_storageSetScheme); - this._tempDatasetDirectory = dataProvider.GetTempDirectory(this); this._jobMinNodes = numProcesses; - this._dataNameNodeDataPort = HdfsNameNodeDataPort; - this._dataNameNodeHttpPort = HdfsNameNodeHttpPort; - CommonInit(); + // make an Azure subscriptions object just in case we want to access azure streams from local execution + this._azureSubscriptions = new AzureSubscriptions(); } /// /// Initializes a new instance of the DryadLinqContext class for a YARN cluster. /// - /// The head node of the cluster and DFS. - public DryadLinqContext(string headNode, PlatformKind platform = PlatformKind.YARN) - : this(headNode, headNode, platform) + /// The head node of the cluster and DFS + /// The service platform to run DryadLINQ jobs. Defaults to YARN Azure + public DryadLinqContext(string clusterName, PlatformKind platform = PlatformKind.YARN_AZURE) + : this(MakeCluster(clusterName, platform)) { } /// - /// Initializes a new instance of the DryadLinqContext class for a YARN cluster. + /// Initializes a new instance of the DryadLinqContext class for a specified cluster. /// - /// The head node of YARN cluster used to execute LINQ queries. - /// The namenode for the HDFS. - /// The cluster platform - public DryadLinqContext(string headNode, string hdfsNameNode, PlatformKind platform = PlatformKind.YARN) + /// The cluster to run DryadLINQ jobs + public DryadLinqContext(DryadLinqCluster cluster) { // Verify that the head node is set - if (String.IsNullOrEmpty(headNode)) + if (String.IsNullOrEmpty(cluster.HeadNode)) { throw new DryadLinqException(DryadLinqErrorCode.ClusterNameMustBeSpecified, SR.ClusterNameMustBeSpecified); } - CommonInit(); - this._platformKind = platform; - this._runtime = new DryadLinqQueryRuntime(headNode); - this._headNode = headNode; - this._dataNameNode = hdfsNameNode; - this._dataNameNodeDataPort = HdfsNameNodeDataPort; - this._dataNameNodeHttpPort = HdfsNameNodeHttpPort; - this._storageSetScheme = DataPath.HDFS_URI_SCHEME; - DataProvider dataProvider = DataProvider.GetDataProvider(_storageSetScheme); - this._tempDatasetDirectory = dataProvider.GetTempDirectory(this); - this._dfsClient = new Peloponnese.Storage.WebHdfsClient(hdfsNameNode, this._dataNameNodeDataPort, 50070); - this._clusterClient = new Peloponnese.ClusterUtils.NativeYarnClient( - hdfsNameNode, this._dataNameNodeDataPort); - } - /// - /// Initializes a new instance of the DryadLinqContext class for Azure - /// - public DryadLinqContext(string accountName, string accountKey, string containerName, - string clusterName = null, string subscriptionId = null, string certificateThumbprint = null) - { - // Verify that the head node is set - if (String.IsNullOrEmpty(containerName)) + this.CommonInit(); + this._platformKind = cluster.Kind; + this._headNode = cluster.HeadNode; + this._clusterDetails = cluster; + + if (cluster.Kind == DryadLinq.PlatformKind.YARN_NATIVE) { - throw new DryadLinqException(DryadLinqErrorCode.ClusterNameMustBeSpecified, - SR.ClusterNameMustBeSpecified); + this._storageSetScheme = DataPath.HDFS_URI_SCHEME; + // make an Azure subscriptions object just in case we want to access azure streams from the native yarn cluster + this._azureSubscriptions = new AzureSubscriptions(); + } + else if (cluster.Kind == DryadLinq.PlatformKind.YARN_AZURE) + { + this._storageSetScheme = DataPath.AZUREBLOB_URI_SCHEME; + DryadLinqAzureCluster azureCluster = cluster as DryadLinqAzureCluster; + this._azureSubscriptions = azureCluster.Subscriptions; } - CommonInit(); - this._platformKind = PlatformKind.YARN; - this._runtime = new DryadLinqQueryRuntime(containerName); - this._headNode = string.Empty; - this._storageSetScheme = DataPath.AZUREBLOB_URI_SCHEME; - this._azureAccountName = accountName; - this._azureAccountKeyDictionary = new Dictionary(); - this._azureAccountKeyDictionary.Add(this._azureAccountName, accountKey); - this._azureContainerName = containerName; - DataProvider dataProvider = DataProvider.GetDataProvider(_storageSetScheme); - this._tempDatasetDirectory = dataProvider.GetTempDirectory(this); - AzureDfsClient dfsClient = new Peloponnese.Storage.AzureDfsClient(accountName, accountKey, containerName); - _dfsClient = dfsClient; - _clusterClient = new Peloponnese.ClusterUtils.AzureYarnClient( - dfsClient, this.PeloponneseHomeDirectory, clusterName, - subscriptionId, certificateThumbprint); } private void CommonInit() { this._peloponneseHome = Peloponnese.ClusterUtils.ConfigHelpers.GetPPMHome(null); - this._dryadHome = GetDryadHome(); - } - - private string GetDryadHome() - { - string dryadHome = Environment.GetEnvironmentVariable(StaticConfig.DryadHomeVar); - - if (dryadHome == null) + if (Microsoft.Research.Peloponnese.ClusterUtils.ConfigHelpers.RunningFromNugetPackage) { - if (Microsoft.Research.Peloponnese.ClusterUtils.ConfigHelpers.RunningFromNugetPackage) - { - dryadHome = Microsoft.Research.Peloponnese.ClusterUtils.ConfigHelpers.GetPPMHome(null); - } - else - { - throw new ApplicationException("Cannot find Dryad home directory; must define " + StaticConfig.DryadHomeVar); - } + this._dryadHome = Microsoft.Research.Peloponnese.ClusterUtils.ConfigHelpers.GetPPMHome(null); + } + else + { + this._dryadHome = Environment.GetEnvironmentVariable(StaticConfig.DryadHomeVar); } - - return dryadHome; } + /// + /// Gets and sets the job executor. The current release only supports Dryad. + /// public ExecutorKind ExecutorKind { get { return this._executorKind; } set { _executorKind = value; } } + /// + /// Gets or sets the service platform + /// public PlatformKind PlatformKind { - get { return this._platformKind; } + get { return _platformKind; } set { _platformKind = value; } } @@ -322,116 +565,23 @@ namespace Microsoft.Research.DryadLinq set { _headNode = value; } } - /// - /// Gets the DscService associated with this DryadLinqContext. - /// - public DscService DscService - { - get - { - ThrowIfDisposed(); - return _dscService; - } - } - - /// - /// Gets the DfsClient associated with this HpcLinqContext. - /// - public IDfsClient DfsClient - { - get - { - ThrowIfDisposed(); - return _dfsClient; - } - } - - /// - /// Gets the ClusterClient associated with this HpcLinqContext. - /// - public ClusterClient ClusterClient - { - get - { - ThrowIfDisposed(); - return _clusterClient; - } - } - - /// - /// Gets or sets the namenode for the data store. - /// - public string DataNameNode - { - get { return _dataNameNode; } - set { _dataNameNode = value; } - } - - /// - /// Gets or sets the HTTP port used by the namenode for the HDFS. - /// - public int DataNameNodeDataPort - { - get { return _dataNameNodeDataPort; } - set { _dataNameNodeDataPort = value; } - } - - /// - /// Gets or sets the HTTP port used by the namenode for the HDFS. - /// - public int DataNameNodeHttpPort - { - get { return _dataNameNodeHttpPort; } - set { _dataNameNodeHttpPort = value; } - } - - /// - /// Gets or sets the account name for Azure. - /// - public string AzureAccountName - { - get { return _azureAccountName; } - set { _azureAccountName = value; } - } - - /// - /// Registers a key for an Azure account - /// - public void RegisterAzureAccountKey(string accountName, string accountKey) - { - _azureAccountKeyDictionary[accountName] = accountKey; - } - - /// - /// Retrieves the key for an azure account - /// - public string AzureAccountKey(string accountName) - { - if (!_azureAccountKeyDictionary.ContainsKey(accountName)) - { - return null; - } - return _azureAccountKeyDictionary[accountName]; - } - - /// - /// Gets or sets the container name for Azure. - /// - public string AzureContainerName - { - get { return _azureContainerName; } - set { _azureContainerName = value; } - } - /// /// Gets or sets the partition UNC path used when constructing a partitioned table. - /// + /// public string PartitionUncPath { get { return _partitionUncPath; } set { _partitionUncPath = value; } } + /// + /// Gets the cluster object used to run the DryadLINQ query + /// + internal DryadLinqCluster Cluster + { + get { return _clusterDetails; } + } + /// /// Gets the collection of environment variables associated with the DryadLINQ job. /// @@ -443,10 +593,6 @@ namespace Microsoft.Research.DryadLinq /// /// Gets or sets the descriptive name used to describe the DryadLINQ job. /// - /// - /// The default is null (no name). May be overriden by cluster settings such as node templates. - /// This property can be altered even when is true. - /// public string JobFriendlyName { get { return _jobFriendlyName; } @@ -553,36 +699,21 @@ namespace Microsoft.Research.DryadLinq set { _localExecution = value; } } + /// + /// Gets and sets the value specifying whether a vertex should break into the debugger + /// public bool DebugBreak { - get { return this.JobEnvironmentVariables.ContainsKey("DLINQ_DEBUGVERTEX"); } + get + { + return this.JobEnvironmentVariables.ContainsKey("DLINQ_DEBUGVERTEX"); + } set { this.JobEnvironmentVariables["DLINQ_DEBUGVERTEX"] = "BREAK"; } } - /// - /// Gets or sets the value specifying the platform for this query. - /// - /// - /// - /// If YARN, the query will execute on a YARN cluster. - /// - /// - /// LOCAL mode is determined by the flag _localExecution, and is mutually exclusive - /// with LocalDebug. If LOCAL, the query will execute with DryadLinq on processes - /// spawned on the local machine. This mode is particularly useful for debugging - /// interactions between processes. - /// - /// The default is YARN. - /// - public PlatformKind Platform - { - get { return _platformKind; } - set { _platformKind = value; } - } - /// /// Get the list of resources to add to the DryadLINQ job. /// @@ -722,19 +853,10 @@ namespace Microsoft.Research.DryadLinq set { _forceGC = value; } } - // internal: the runtime associated with this DryadLinqContext. - internal DryadLinqQueryRuntime Runtime - { - get - { - ThrowIfDisposed(); - return _runtime; - } - } - /// /// Version of the DryadLinq client components /// + /// The version of the DryadLINQ DLL public Version ClientVersion() { ThrowIfDisposed(); @@ -754,28 +876,6 @@ namespace Microsoft.Research.DryadLinq return _clientVersion; } - /// - /// Version of the DryadLinq server components - /// - public Version ServerVersion() - { - ThrowIfDisposed(); - if (_serverVersion == null) - { - try - { - IServerVersion version = this.GetIScheduler().GetServerVersion(); - _serverVersion = new Version(version.Major, version.Minor, version.Build, version.Revision); - } - catch (Exception ex) - { - throw new DryadLinqException(DryadLinqErrorCode.CouldNotGetServerVersion, - SR.CouldNotGetServerVersion, ex); - } - } - return _serverVersion; - } - internal DryadLinqJobExecutor MakeJobExecutor() { switch (this.ExecutorKind) @@ -791,42 +891,43 @@ namespace Microsoft.Research.DryadLinq } } - public Uri MakeTemporaryStreamUri() + internal Uri MakeTemporaryStreamUri() { if (this._storageSetScheme == null) { throw new DryadLinqException("The storage scheme for temporary streams must be specified."); } - return new Uri(this._tempDatasetDirectory, DryadLinqUtil.MakeUniqueName()); + DataProvider dataProvider = DataProvider.GetDataProvider(this._storageSetScheme); + return dataProvider.GetTemporaryStreamUri(this, DryadLinqUtil.MakeUniqueName()); } /// - /// Open a dataset as a DryadLinq's IQueryable. + /// Open a dataset as a DryadLinq specialized IQueryable{T}. /// /// The type of the records in the table. - /// The name of the dataset. + /// The name of the dataset. /// An IQueryable{T} representing the data. - public IQueryable FromStore(string dataSetName) + public IQueryable FromStore(string dataSetUri) { - return FromStore(new Uri(dataSetName)); + return FromStore(new Uri(dataSetUri)); } /// - /// Open a dataset as a DryadLinq's IQueryable. + /// Open a dataset as a DryadLinq specialized IQueryable{T}. /// /// The type of the records in the table. - /// The name of the dataset. + /// The name of the dataset. /// An IQueryable{T} representing the data. - public IQueryable FromStore(Uri dataSetName) + public IQueryable FromStore(Uri dataSetUri) { ThrowIfDisposed(); - DryadLinqQuery q = DataProvider.GetPartitionedTable(this, dataSetName); + DryadLinqQuery q = DataProvider.GetPartitionedTable(this, dataSetUri); q.CheckAndInitialize(); // force the data-info checks. return q; } /// - /// Converts an IEnumerable{T} to a DryadLinq IQueryable{T}. + /// Converts an IEnumerable{T} to a DryadLinq specialized IQueryable{T}. /// /// The type of the records in the table. /// The source data. @@ -838,11 +939,32 @@ namespace Microsoft.Research.DryadLinq public IQueryable FromEnumerable(IEnumerable data) { Uri dataSetName = this.MakeTemporaryStreamUri(); - CompressionScheme compressionScheme = this.IntermediateDataCompressionScheme; + CompressionScheme compressionScheme = this.OutputDataCompressionScheme; DryadLinqMetaData metadata = new DryadLinqMetaData(this, typeof(T), dataSetName, compressionScheme); return DataProvider.StoreData(this, data, dataSetName, metadata, compressionScheme, true); } + /// + /// Register a named account with the specified storage key, so that key won't need to be specified in Azure blob URIs + /// + /// The name of the storage account + /// The account's key + public void RegisterAzureAccount(string storageAccountName, string storageAccountKey) + { + _azureSubscriptions.AddAccount(storageAccountName, storageAccountKey); + } + + /// + /// Get the key associated with a named account, or null if it is not registered or auto-detected from + /// the subscriptions + /// + /// The name of the storage account + /// The storage key, or null + public string AzureAccountKey(string storageAccountName) + { + return _azureSubscriptions.GetAccountKeyAsync(storageAccountName).Result; + } + internal static DryadLinqContext GetContext(IQueryProvider provider) { DryadLinqProviderBase baseProvider = provider as DryadLinqProviderBase; @@ -855,12 +977,6 @@ namespace Microsoft.Research.DryadLinq return context; } - // Return IScheduler reference for internal use - internal IScheduler GetIScheduler() - { - return this._runtime.GetIScheduler(); - } - /// /// Releases all resources used by the DryadLinqContext. /// @@ -869,16 +985,6 @@ namespace Microsoft.Research.DryadLinq if (!_isDisposed) { _isDisposed = true; - if (_runtime != null) - { - _runtime.Dispose(); - _runtime = null; - } - if (_dscService != null) - { - _dscService.Close(); - _dscService = null; - } } } @@ -890,7 +996,12 @@ namespace Microsoft.Research.DryadLinq } } - // This is used to check if a DryadLINQ query is constructed using the same context. + /// + /// Determines whether this instance of DryadLinqContext is equal to another instance + /// of . + /// + /// The other DryadLinqContext instance + /// true if the two instances are equal public virtual bool Equals(DryadLinqContext context) { return (this.IntermediateDataCompressionScheme == context.IntermediateDataCompressionScheme && @@ -899,11 +1010,7 @@ namespace Microsoft.Research.DryadLinq this.DryadHomeDirectory == context.DryadHomeDirectory && this.PeloponneseHomeDirectory == context.PeloponneseHomeDirectory && this.HeadNode == context.HeadNode && - this.DataNameNode == context.DataNameNode && - this.DataNameNodeDataPort == context.DataNameNodeDataPort && - this.DataNameNodeHttpPort == context.DataNameNodeHttpPort && - this.AzureAccountName == context.AzureAccountName && - this.AzureContainerName == context.AzureContainerName && + this.Cluster == context.Cluster && this.PartitionUncPath == context.PartitionUncPath && this.JobMinNodes == context.JobMinNodes && this.JobMaxNodes == context.JobMaxNodes && @@ -912,7 +1019,7 @@ namespace Microsoft.Research.DryadLinq this.EnableSpeculativeDuplication == context.EnableSpeculativeDuplication && this.LocalDebug == context.LocalDebug && this.LocalExecution == context.LocalExecution && - this.Platform == context.Platform && + this.PlatformKind == context.PlatformKind && this.JobUsername == context.JobUsername && this.JobPassword == context.JobPassword && this.RuntimeTraceLevel == context.RuntimeTraceLevel && diff --git a/LinqToDryad/DryadLinqDecomposition.cs b/LinqToDryad/DryadLinqDecomposition.cs index 78e1d67..2fc1d8b 100644 --- a/LinqToDryad/DryadLinqDecomposition.cs +++ b/LinqToDryad/DryadLinqDecomposition.cs @@ -26,10 +26,11 @@ using System.Reflection; using System.Linq; using System.Linq.Expressions; using System.Diagnostics; -using Microsoft.Research.DryadLinq.Internal; namespace Microsoft.Research.DryadLinq { + using Microsoft.Research.DryadLinq.Internal; + internal class DecompositionInfo { private Expression m_func; // The original function call @@ -744,6 +745,13 @@ namespace Microsoft.Research.DryadLinq } } } +} + +#pragma warning disable 1591 + +namespace Microsoft.Research.DryadLinq.Internal +{ + using Microsoft.Research.DryadLinq; public class ContainsDecomposition : IDecomposable { diff --git a/LinqToDryad/DryadLinqEnumerable.cs b/LinqToDryad/DryadLinqEnumerable.cs index c4f7f53..f77e870 100644 --- a/LinqToDryad/DryadLinqEnumerable.cs +++ b/LinqToDryad/DryadLinqEnumerable.cs @@ -27,12 +27,15 @@ using System.Reflection; using System.Linq; using Microsoft.Research.DryadLinq.Internal; +#pragma warning disable 1591 + namespace Microsoft.Research.DryadLinq.Internal { - // This class provides the IEnumerable implementation of the operators - // we introduced in DryadLINQ. This is needed when LocalDebug is set. - // - // Due to the way linq-to-objects locates methods to call, this class must be public visibility. + /// + /// This class provides the IEnumerable implementation of the operators + /// we introduced in DryadLINQ. This is needed to implement LocalDebug. + /// + /// A DryadLINQ user should not need to use this class directly. public static class DryadLinqEnumerable { // Operator: HashPartition diff --git a/LinqToDryad/DryadLinqException.cs b/LinqToDryad/DryadLinqException.cs index 23954e9..5b0a8d5 100644 --- a/LinqToDryad/DryadLinqException.cs +++ b/LinqToDryad/DryadLinqException.cs @@ -27,19 +27,36 @@ using Microsoft.Research.DryadLinq.Internal; namespace Microsoft.Research.DryadLinq { + /// + /// The exception that is thrown by DryadLINQ. + /// [Serializable] public class DryadLinqException : Exception { private int m_errorCode; + /// + /// Initializes an instance of DryadLinqException with a specified message. + /// + /// The exception message. public DryadLinqException(string message) : base(message) { } + /// + /// Initializes an instance of DryadLinqException with a specified message and an inner exception. + /// + /// The exception message. + /// The inner exception. public DryadLinqException(string message, Exception inner) : base(message, inner) { } + /// + /// Initializes an instance of DryadLinqException with serialized data. + /// + /// The SerializationInfo that holds the serialized object data about the exception being thrown. + /// The StreamingContext that contains contextual information about the source or destination. protected DryadLinqException(SerializationInfo info, StreamingContext context) : base(info, context) { @@ -49,13 +66,24 @@ namespace Microsoft.Research.DryadLinq } } + /// + /// Initializes an instance of DryadLinqException with a specified error code and message. + /// + /// The error code. + /// The exception message. internal DryadLinqException(int errorCode, string message) : base(message) { this.m_errorCode = errorCode; } - + /// + /// Initializes an instance of DryadLinqException with a specified error code, an exception + /// message, and an inner exception. + /// + /// The error code. + /// The exception message. + /// The inner exception. internal DryadLinqException(int errorCode, string message, Exception innerException) : base(message, innerException) { @@ -65,8 +93,18 @@ namespace Microsoft.Research.DryadLinq /// /// Exception's error code. Maps to values in DryadLinqErrorCode. /// - public int ErrorCode { get { return m_errorCode; } } + public int ErrorCode + { + get { return this.m_errorCode; } + } + /// + /// Creates an instance of DryadLinqException. + /// + /// The error code. + /// The error message. + /// The current expression. + /// internal static Exception Create(int errorCode, string msg, Expression expr) { StringBuilder sb = new StringBuilder(); @@ -77,6 +115,11 @@ namespace Microsoft.Research.DryadLinq return new DryadLinqException(errorCode, sb.ToString()); } + /// + /// Sets the SerializationInfo with information about the exception. + /// + /// The SerializationInfo that holds the serialized object data about the exception being thrown. + /// The StreamingContext that contains contextual information about the source or destination. public override void GetObjectData(SerializationInfo info, StreamingContext context) { base.GetObjectData(info, context); diff --git a/LinqToDryad/DryadLinqExtension.cs b/LinqToDryad/DryadLinqExtension.cs index 5fee6aa..1d24bb1 100644 --- a/LinqToDryad/DryadLinqExtension.cs +++ b/LinqToDryad/DryadLinqExtension.cs @@ -31,15 +31,21 @@ using Microsoft.Research.DryadLinq.Internal; namespace Microsoft.Research.DryadLinq { /// - /// This provides some useful classes and operators that are commonly used - /// in applications. The operators are defined using DryadLINQ operators. + /// Represents a key/value pair. Very similar to KeyValuePair, but adds a few more methods. /// + /// The type of the key + /// The type of the value [Serializable] public struct Pair : IEquatable> { private T1 m_key; private T2 m_value; + /// + /// Initializes an instance of this key-value Pair structure. + /// + /// The key of the pair. + /// The value of the pair. [FieldMapping("x", "Key")] [FieldMapping("y", "Value")] public Pair(T1 x, T2 y) @@ -48,16 +54,27 @@ namespace Microsoft.Research.DryadLinq this.m_value = y; } + /// + /// Gets the key in the key-value pair. + /// public T1 Key { get { return this.m_key; } } + /// + /// Gets the value in the key-value pair. + /// public T2 Value { get { return this.m_value; } } + /// + /// Indicates whether this instance and a specified object are equal. + /// + /// The object to compare with + /// true iff this instance is equal to a specified object public override bool Equals(Object obj) { if (!(obj is Pair)) return false; @@ -65,37 +82,73 @@ namespace Microsoft.Research.DryadLinq return this.m_key.Equals(pair.Key) && this.m_value.Equals(pair.Value); } + /// + /// Determines whether this instance and another Pair are equal. + /// + /// The other Pair to compare with + /// true iff this instance and the specified Pair are equal public bool Equals(Pair val) { return this.m_key.Equals(val.Key) && this.m_value.Equals(val.Value); } + /// + /// Determines whether two specified Pairs are equal. + /// + /// The first Pair + /// The second Pair + /// true iff two Pairs are equal public static bool Equals(Pair a, Pair b) { return a.Equals(b); } + /// + /// Determines whether two specified Pairs are equal. + /// + /// The left Pair + /// The right Pair + /// true iff two Pairs are equal public static bool operator ==(Pair a, Pair b) { return a.Equals(b); } + /// + /// Determines whether two specified Pairs are not equal. + /// + /// The left Pair + /// The right Pair + /// true iff two Pairs are not equal public static bool operator !=(Pair a, Pair b) { return !a.Equals(b); } + /// + /// Returns the hash code of the current Pair. + /// + /// A 32-bit signed integer. public override int GetHashCode() { return (-1521134295 * this.m_key.GetHashCode()) + this.m_value.GetHashCode(); } + /// + /// Returns a string that represents the current Pair. + /// + /// A string that represents the current Pair. public override string ToString() { return "<" + this.Key + ", " + this.Value + ">"; } } + /// + /// Defines some useful operators that are commonly used in applications. The + /// operators are defined using the basic DryadLINQ operators. This class + /// also shows how a user library can be defined. + /// public static class DryadLinqExtension { /// @@ -202,24 +255,28 @@ namespace Microsoft.Research.DryadLinq /// /// Broadcast a dataset to multiple partitions /// - /// The type of the input records - /// The input dataset - /// The output dataset, which consists of multiple copies of source - public static IQueryable BroadCast(this IQueryable source) + /// The record type of the source + /// The record type of the destination + /// The source dataset to broadcast + /// The destination dataset to receive + /// The output dataset, which consists of multiple copies of source. The number + /// of copies is the number of partitions of destination. + public static IQueryable BroadCast(this IQueryable source, IQueryable destination) { - return source.ApplyPerPartition(source, (x, y) => y, true); + return destination.ApplyPerPartition(source, (x, y) => y, true); } /// /// Broadcast a dataset to n partitions. /// - /// The type of the input records - /// The input dataset + /// The record type of the source + /// The source dataset to broadcast + /// The number of copies to broadcast /// The output dataset, each partition of which is a copy of source - public static IQueryable BroadCast(this IQueryable source, int n) + public static IQueryable BroadCast(this IQueryable source, int bcnt) { var dummy = source.ApplyPerPartition(x => DryadLinqHelper.ValueZero(x)) - .HashPartition(x => x, n); + .HashPartition(x => x, bcnt); return dummy.ApplyPerPartition(source, (x, y) => y, true); } diff --git a/LinqToDryad/DryadLinqFactory.cs b/LinqToDryad/DryadLinqFactory.cs index 03d6c76..5db80cc 100644 --- a/LinqToDryad/DryadLinqFactory.cs +++ b/LinqToDryad/DryadLinqFactory.cs @@ -26,13 +26,47 @@ using System.Reflection; using System.Data.SqlTypes; using Microsoft.Research.DryadLinq; +#pragma warning disable 1591 + namespace Microsoft.Research.DryadLinq.Internal { + /// + /// Represents a factory for making DryadLINQ readers/writers. + /// + /// The type of record + /// A DryadLINQ user should not need to use this class directly. It is public + /// because it is used in the code auto-generated by DryadLINQ. public abstract class DryadLinqFactory { + /// + /// Creates a new reader from a native stream. + /// + /// A native stream. + /// A reader that uses DryadLINQ serialization for records of type T. public abstract DryadLinqRecordReader MakeReader(NativeBlockStream nativeStream); + + /// + /// Creates a new reader from a Dryad vertex handle and a port number. + /// + /// A Dryad vertex handle. + /// A port number + /// A reader that uses DryadLINQ serialization for records of type T. public abstract DryadLinqRecordReader MakeReader(IntPtr handle, UInt32 port); + + /// + /// Creates a new writer from a native stream. + /// + /// A native stream. + /// A writer that uses DryadLINQ serialization for record of type T. public abstract DryadLinqRecordWriter MakeWriter(NativeBlockStream nativeStream); + + /// + /// Creates a new writer from a Dryad vertex handle and a port number. + /// + /// A Dryad vertex handle. + /// A port number. + /// The size for write buffer. + /// A writer that uses DryadLINQ serialization for records of type T. public abstract DryadLinqRecordWriter MakeWriter(IntPtr handle, UInt32 port, Int32 buffSize); } diff --git a/LinqToDryad/DryadLinqFaultCodes.cs b/LinqToDryad/DryadLinqFaultCodes.cs index 0c49c2e..0f98dad 100644 --- a/LinqToDryad/DryadLinqFaultCodes.cs +++ b/LinqToDryad/DryadLinqFaultCodes.cs @@ -33,7 +33,7 @@ namespace Microsoft.Research.DryadLinq CodeGen = 0x02000000, JobSubmission = 0x03000000, Serialization = 0x04000000, - DscClient= 0x05000000, + StoreClient= 0x05000000, VertexRuntime = 0x06000000, LocalDebug = 0x07000000, Unknown = 0x0f000000 @@ -46,7 +46,7 @@ namespace Microsoft.Research.DryadLinq /// NOTE: New error codes must be appended to a category /// NOTE: Error codes cannot be deleted /// - public static class DryadLinqErrorCode + internal static class DryadLinqErrorCode { internal const int codesPerCategory = 0x01000000; @@ -104,28 +104,28 @@ namespace Microsoft.Research.DryadLinq #endregion - #region DscClient - public const int DSCStreamError = (int) DryadLinqErrorCodeCategory.DscClient + 0; - public const int StreamDoesNotExist = (int) DryadLinqErrorCodeCategory.DscClient + 1; - public const int StreamAlreadyExists = (int) DryadLinqErrorCodeCategory.DscClient + 2; - public const int AttemptToReadFromAWriteStream = (int) DryadLinqErrorCodeCategory.DscClient + 3; - public const int FailedToCreateStream = (int) DryadLinqErrorCodeCategory.DscClient + 4; - public const int JobToCreateTableWasCanceled = (int) DryadLinqErrorCodeCategory.DscClient + 5; - public const int FailedToGetReadPathsForStream = (int) DryadLinqErrorCodeCategory.DscClient + 6; - public const int CannotAccesFilePath = (int)DryadLinqErrorCodeCategory.DscClient + 7; - public const int PositionNotSupported = (int)DryadLinqErrorCodeCategory.DscClient + 8; - public const int GetFileSizeError = (int)DryadLinqErrorCodeCategory.DscClient + 9; - public const int ReadFileError = (int)DryadLinqErrorCodeCategory.DscClient + 10; - public const int UnknownCompressionScheme = (int)DryadLinqErrorCodeCategory.DscClient + 11; - public const int WriteFileError = (int)DryadLinqErrorCodeCategory.DscClient + 12; - public const int MultiBlockEmptyPartitionList = (int)DryadLinqErrorCodeCategory.DscClient + 13; - public const int GetURINotSupported = (int)DryadLinqErrorCodeCategory.DscClient + 14; - public const int SetCalcFPNotSupported = (int)DryadLinqErrorCodeCategory.DscClient + 15; - public const int GetFPNotSupported = (int)DryadLinqErrorCodeCategory.DscClient + 16; - public const int FailedToAllocateNewNativeBuffer = (int)DryadLinqErrorCodeCategory.DscClient + 17; - public const int FailedToReadFromInputChannel = (int)DryadLinqErrorCodeCategory.DscClient + 18; - public const int FailedToWriteToOutputChannel = (int)DryadLinqErrorCodeCategory.DscClient + 19; - public const int MultiBlockCannotAccesFilePath = (int)DryadLinqErrorCodeCategory.DscClient + 25; + #region StoreClient + public const int DSCStreamError = (int) DryadLinqErrorCodeCategory.StoreClient + 0; + public const int StreamDoesNotExist = (int) DryadLinqErrorCodeCategory.StoreClient + 1; + public const int StreamAlreadyExists = (int) DryadLinqErrorCodeCategory.StoreClient + 2; + public const int AttemptToReadFromAWriteStream = (int) DryadLinqErrorCodeCategory.StoreClient + 3; + public const int FailedToCreateStream = (int) DryadLinqErrorCodeCategory.StoreClient + 4; + public const int JobToCreateTableWasCanceled = (int) DryadLinqErrorCodeCategory.StoreClient + 5; + public const int FailedToGetReadPathsForStream = (int) DryadLinqErrorCodeCategory.StoreClient + 6; + public const int CannotAccesFilePath = (int)DryadLinqErrorCodeCategory.StoreClient + 7; + public const int PositionNotSupported = (int)DryadLinqErrorCodeCategory.StoreClient + 8; + public const int GetFileSizeError = (int)DryadLinqErrorCodeCategory.StoreClient + 9; + public const int ReadFileError = (int)DryadLinqErrorCodeCategory.StoreClient + 10; + public const int UnknownCompressionScheme = (int)DryadLinqErrorCodeCategory.StoreClient + 11; + public const int WriteFileError = (int)DryadLinqErrorCodeCategory.StoreClient + 12; + public const int MultiBlockEmptyPartitionList = (int)DryadLinqErrorCodeCategory.StoreClient + 13; + public const int GetURINotSupported = (int)DryadLinqErrorCodeCategory.StoreClient + 14; + public const int SetCalcFPNotSupported = (int)DryadLinqErrorCodeCategory.StoreClient + 15; + public const int GetFPNotSupported = (int)DryadLinqErrorCodeCategory.StoreClient + 16; + public const int FailedToAllocateNewNativeBuffer = (int)DryadLinqErrorCodeCategory.StoreClient + 17; + public const int FailedToReadFromInputChannel = (int)DryadLinqErrorCodeCategory.StoreClient + 18; + public const int FailedToWriteToOutputChannel = (int)DryadLinqErrorCodeCategory.StoreClient + 19; + public const int MultiBlockCannotAccesFilePath = (int)DryadLinqErrorCodeCategory.StoreClient + 25; #endregion #region JobSubmission @@ -216,7 +216,7 @@ namespace Microsoft.Research.DryadLinq public const int FailureInOperator = (int)DryadLinqErrorCodeCategory.VertexRuntime + 25; public const int FailureInUserApplyFunction = (int)DryadLinqErrorCodeCategory.VertexRuntime + 26; public const int FailureInOrderedGroupBy = (int)DryadLinqErrorCodeCategory.VertexRuntime + 27; - public const int TooManyElementsBeforeReduction = (int)DryadLinqErrorCodeCategory.VertexRuntime + 33; //@@TODO: when possible, reword the sr.txt entry. + public const int TooManyElementsBeforeReduction = (int)DryadLinqErrorCodeCategory.VertexRuntime + 33; #endregion #region LocalDebug @@ -233,31 +233,38 @@ namespace Microsoft.Research.DryadLinq /// internal static DryadLinqErrorCodeCategory Category(int code) { - if ((code >= (int) DryadLinqErrorCodeCategory.QueryAPI) && (code < (int) DryadLinqErrorCodeCategory.QueryAPI+ codesPerCategory)) + if ((code >= (int)DryadLinqErrorCodeCategory.QueryAPI) && + (code < (int)DryadLinqErrorCodeCategory.QueryAPI+ codesPerCategory)) { return DryadLinqErrorCodeCategory.QueryAPI; } - else if ((code >= (int) DryadLinqErrorCodeCategory.CodeGen) && (code < (int) DryadLinqErrorCodeCategory.CodeGen+ codesPerCategory)) + else if ((code >= (int)DryadLinqErrorCodeCategory.CodeGen) && + (code < (int)DryadLinqErrorCodeCategory.CodeGen+ codesPerCategory)) { return DryadLinqErrorCodeCategory.CodeGen; } - else if ((code >= (int) DryadLinqErrorCodeCategory.JobSubmission) && (code < (int) DryadLinqErrorCodeCategory.JobSubmission+ codesPerCategory)) + else if ((code >= (int)DryadLinqErrorCodeCategory.JobSubmission) && + (code < (int)DryadLinqErrorCodeCategory.JobSubmission+ codesPerCategory)) { return DryadLinqErrorCodeCategory.JobSubmission; } - else if ((code >= (int) DryadLinqErrorCodeCategory.Serialization) && (code < (int) DryadLinqErrorCodeCategory.Serialization+ codesPerCategory)) + else if ((code >= (int)DryadLinqErrorCodeCategory.Serialization) && + (code < (int)DryadLinqErrorCodeCategory.Serialization+ codesPerCategory)) { return DryadLinqErrorCodeCategory.Serialization; } - else if ((code >= (int)DryadLinqErrorCodeCategory.DscClient) && (code < (int)DryadLinqErrorCodeCategory.DscClient+ codesPerCategory)) + else if ((code >= (int)DryadLinqErrorCodeCategory.StoreClient) && + (code < (int)DryadLinqErrorCodeCategory.StoreClient+ codesPerCategory)) { - return DryadLinqErrorCodeCategory.DscClient; + return DryadLinqErrorCodeCategory.StoreClient; } - else if ((code >= (int)DryadLinqErrorCodeCategory.VertexRuntime) && (code < (int)DryadLinqErrorCodeCategory.VertexRuntime+ codesPerCategory)) + else if ((code >= (int)DryadLinqErrorCodeCategory.VertexRuntime) && + (code < (int)DryadLinqErrorCodeCategory.VertexRuntime+ codesPerCategory)) { return DryadLinqErrorCodeCategory.VertexRuntime; } - else if ((code >= (int)DryadLinqErrorCodeCategory.LocalDebug) && (code < (int)DryadLinqErrorCodeCategory.LocalDebug+ codesPerCategory)) + else if ((code >= (int)DryadLinqErrorCodeCategory.LocalDebug) && + (code < (int)DryadLinqErrorCodeCategory.LocalDebug+ codesPerCategory)) { return DryadLinqErrorCodeCategory.LocalDebug; } @@ -266,6 +273,5 @@ namespace Microsoft.Research.DryadLinq return DryadLinqErrorCodeCategory.Unknown; } } - } } diff --git a/LinqToDryad/DryadLinqGlobals.cs b/LinqToDryad/DryadLinqGlobals.cs index b8e5196..88a6af4 100644 --- a/LinqToDryad/DryadLinqGlobals.cs +++ b/LinqToDryad/DryadLinqGlobals.cs @@ -163,7 +163,7 @@ namespace Microsoft.Research.DryadLinq } /// - /// Contains references to class&method names that are referenced via reflection. + /// Contains references to class/method/field names that are referenced via reflection. /// This is intended to assist with refactoring that may break reflection. /// /// NOTE: this list will probably never be complete. diff --git a/LinqToDryad/DryadLinqHelper.cs b/LinqToDryad/DryadLinqHelper.cs index c8069b6..0204f9a 100644 --- a/LinqToDryad/DryadLinqHelper.cs +++ b/LinqToDryad/DryadLinqHelper.cs @@ -27,10 +27,26 @@ using System.Reflection; using System.Linq.Expressions; using System.Linq; +#pragma warning disable 1591 + namespace Microsoft.Research.DryadLinq.Internal { + /// + /// DryadLINQ helper functions. They are mainly used in the implementation of DryadLINQ. + /// + /// A DryadLINQ user should not need to use DryadLinqHelper directly. public static class DryadLinqHelper { + /// + /// Check if an input source is sorted. + /// + /// The type of input record. + /// The type of key. + /// The input source. + /// The key selection function. + /// A comparer object used to compare keys. + /// True if the check is for descending + /// [Resource(IsStateful = false)] public static IEnumerable CheckSort(IEnumerable source, @@ -65,6 +81,16 @@ namespace Microsoft.Research.DryadLinq.Internal } } + /// + /// Apply a function to the cross product of two input sequences. + /// + /// The record type of the first input. + /// The record type of the second input. + /// The record yype of the result. + /// The first input. + /// The second input. + /// The function to apply. + /// The result of applying the function to the cross product of two inputs. public static IEnumerable Cross(IEnumerable s1, IEnumerable s2, Expression> procFunc) @@ -104,7 +130,14 @@ namespace Microsoft.Research.DryadLinq.Internal } } - // Used in SequenceEqual() + /// + /// Determines whether two sequences are equal according to an equality comparer + /// + /// The record type of the sequences. + /// The first sequence. + /// The second sequence. + /// An equality comparer. + /// true iff the two sequences are equal. public static IEnumerable SequenceEqual(IEnumerable s1, IEnumerable s2, IEqualityComparer comparer) @@ -114,6 +147,14 @@ namespace Microsoft.Research.DryadLinq.Internal } // Used in SlidingWindow() + /// + /// Returns the last windowSize-1 records in a sequence. + /// + /// The record type of the sequence. + /// The input sequence. + /// The window size + /// The last windowSize-1 records as an array. + /// Used in sliding windows computations. [Resource(IsStateful = false)] public static IEnumerable Last(IEnumerable source, int windowSize) diff --git a/LinqToDryad/DryadLinqJobExecutor.cs b/LinqToDryad/DryadLinqJobExecutor.cs index 6d51098..fd45371 100644 --- a/LinqToDryad/DryadLinqJobExecutor.cs +++ b/LinqToDryad/DryadLinqJobExecutor.cs @@ -57,21 +57,13 @@ namespace Microsoft.Research.DryadLinq // use a new job submission object for each query this.m_context = context; this.m_currentStatus = JobStatus.NotSubmitted; - if (context.Runtime is DryadLinqQueryRuntime) + if (context.LocalExecution) { - if (context.LocalExecution) - { - this.m_jobSubmission = new LocalJobSubmission(context); - } - else - { - this.m_jobSubmission = new YarnJobSubmission(context); - } + this.m_jobSubmission = new LocalJobSubmission(context); } else { - throw new DryadLinqException(DryadLinqErrorCode.UnsupportedSchedulerType, - String.Format(SR.UnsupportedSchedulerType, context.Runtime)); + this.m_jobSubmission = new YarnJobSubmission(context); } } @@ -109,7 +101,7 @@ namespace Microsoft.Research.DryadLinq { // Consturct the Graph Manager cmd line. // string queryPlanFileName = Path.GetFileName(queryPlanPath); - this.m_jobSubmission.AddJobOption("cmdline", "DryadLinqGraphManager.exe" + " " + queryPlanPath); + this.m_jobSubmission.AddJobOption("cmdline", "DryadLinqGraphManager.exe" + " " + queryPlanPath); AddResource(this.m_jobSubmission, queryPlanPath); @@ -194,7 +186,7 @@ namespace Microsoft.Research.DryadLinq /// /// True if the background execution has terminated. /// - /// + /// true iff the job has terminated public bool Terminated() { // First check whether the status is finalized @@ -262,10 +254,7 @@ namespace Microsoft.Research.DryadLinq /// internal string ErrorMsg { - get - { - return this.m_jobSubmission.ErrorMsg; - } + get { return this.m_jobSubmission.ErrorMsg; } } public string GetJobId() diff --git a/LinqToDryad/DryadLinqLog.cs b/LinqToDryad/DryadLinqLog.cs index 43065ae..3a4c33d 100644 --- a/LinqToDryad/DryadLinqLog.cs +++ b/LinqToDryad/DryadLinqLog.cs @@ -51,7 +51,7 @@ namespace Microsoft.Research.DryadLinq.Internal if (!IsOn || s_IOErrorOccurred) return; try - { + { if (args == null) { s_writer.WriteLine(msg); @@ -77,7 +77,7 @@ namespace Microsoft.Research.DryadLinq.Internal } return; } - catch (System.ObjectDisposedException) + catch (System.ObjectDisposedException) { s_IOErrorOccurred = true; try @@ -89,21 +89,27 @@ namespace Microsoft.Research.DryadLinq.Internal catch { // supress exceptions that occur during cleanup. - } + } return; - } + } } } } namespace Microsoft.Research.DryadLinq { + /// + /// DryadLINQ logging API. + /// public static class DryadLinqLog { + /// + /// Gets and sets the logging level of DryadLINQ. + /// public static int Level = Constants.TraceErrorLevel; private static TextWriter s_writer = Console.Out; - public static void Initialize(int logLevel, string filePath) + internal static void Initialize(int logLevel, string filePath) { Level = logLevel; if (filePath != null) @@ -124,33 +130,58 @@ namespace Microsoft.Research.DryadLinq } catch (ObjectDisposedException) { - // we're in a shutdown scenario, writing the log triggers an error but it's ok, we can ignore it. - // let's do the next best thing instead: write to the console. + // we're in a shutdown scenario, writing the log triggers an error but it's ok, + // we can ignore it. Let's do the next best thing instead: write to the console. Console.WriteLine(prefix + msg, args); } } } + /// + /// Adds a critical-level log entries. + /// + /// The log message as a format string + /// The objects to format public static void AddCritical(string msg, params object[] args) { Add("Critical: ", Constants.TraceCriticalLevel, msg, args); } + /// + /// Adds an error-level log entries. + /// + /// The log message as a format string + /// The objects to format public static void AddError(string msg, params object[] args) { Add("Error: ", Constants.TraceErrorLevel, msg, args); } + /// + /// Adds a warning-level log entries. + /// + /// The log message as a format string + /// The objects to format public static void AddWarning(string msg, params object[] args) { Add("Warning: ", Constants.TraceWarningLevel, msg, args); } + /// + /// Adds an information-level log entries. + /// + /// The log message as a format string + /// The objects to format public static void AddInfo(string msg, params object[] args) { Add("Info: ", Constants.TraceInfoLevel, msg, args); } + /// + /// Adds a verbose-level log entries. + /// + /// The log message as a format string + /// The objects to format public static void AddVerbose(string msg, params object[] args) { Add("Verbose: ", Constants.TraceVerboseLevel, msg, args); diff --git a/LinqToDryad/DryadLinqMetaData.cs b/LinqToDryad/DryadLinqMetaData.cs index a74cfc7..d5720ee 100644 --- a/LinqToDryad/DryadLinqMetaData.cs +++ b/LinqToDryad/DryadLinqMetaData.cs @@ -32,6 +32,9 @@ using System.Text; namespace Microsoft.Research.DryadLinq { + /// + /// The metadata associated with a DryadLINQ dataset. + /// public class DryadLinqMetaData { const int FLAG_ALLOW_NULL_RECORDS = 0x1; diff --git a/LinqToDryad/DryadLinqNative.cs b/LinqToDryad/DryadLinqNative.cs index f6fc279..3cb5fa4 100644 --- a/LinqToDryad/DryadLinqNative.cs +++ b/LinqToDryad/DryadLinqNative.cs @@ -30,7 +30,7 @@ namespace Microsoft.Research.DryadLinq.Internal { internal struct MEMORYSTATUSEX { - public UInt32 dwLength; + public UInt32 dwLength; public UInt32 dwMemoryLoad; public UInt64 ullTotalPhys; public UInt64 ullAvailPhys; diff --git a/LinqToDryad/DryadLinqObjectStore.cs b/LinqToDryad/DryadLinqObjectStore.cs index 52736fd..ccb5d44 100644 --- a/LinqToDryad/DryadLinqObjectStore.cs +++ b/LinqToDryad/DryadLinqObjectStore.cs @@ -29,12 +29,17 @@ using System.Runtime.Serialization.Formatters.Binary; using System.Diagnostics; using Microsoft.Research.DryadLinq; +#pragma warning disable 1591 + namespace Microsoft.Research.DryadLinq.Internal { - // This class implements an object store that is used to store objects - // needed for remote execution of managed vertex code. All objects put - // in the store must have the .NET Serializable attribute. - // Note: this class is not thread safe + /// + /// This class implements an object store that is used to store objects + /// needed for remote execution of DryadLINQ vertex code. All objects put + /// in the store must have the .NET Serializable attribute. + /// + /// A DryadLINQ user should not need to use this class directly. + /// The methods in this class is not thread safe. public sealed class DryadLinqObjectStore { private const string ObjectStoreFileName = "DryadLinqObjectStore.bin"; diff --git a/LinqToDryad/DryadLinqQuery.cs b/LinqToDryad/DryadLinqQuery.cs index 25539bd..3ce81ec 100644 --- a/LinqToDryad/DryadLinqQuery.cs +++ b/LinqToDryad/DryadLinqQuery.cs @@ -434,7 +434,7 @@ namespace Microsoft.Research.DryadLinq return this.m_isDynamic; } } - + internal override DataSetInfo DataSetInfo { get @@ -450,7 +450,7 @@ namespace Microsoft.Research.DryadLinq } this.CheckAndInitialize(); return this.m_dataSetInfo; - } + } } internal void CheckAndInitialize() @@ -481,18 +481,15 @@ namespace Microsoft.Research.DryadLinq { if (this.IsPlainData && this.m_tableEnumerable == null) { - DryadLinqStreamInfo streamInfo = this.DataProvider.GetStreamInfo(this.Context, this.m_dataSourceUri); + DryadLinqStreamInfo + streamInfo = this.DataProvider.GetStreamInfo(this.Context, this.m_dataSourceUri); Int32 parCount = streamInfo.PartitionCount; Int64 estSize = streamInfo.DataSize; this.m_isDynamic = false; // Finally load any stored metadata to check settings, extract compression-setting // and initialize the DataInfo for this Query. It is uri.. have to convert to stream-name. - DryadLinqMetaData meta = null; - if (DataPath.IsDsc(this.m_dataSourceUri)) - { - meta = DryadLinqMetaData.Get(Context, this.m_dataSourceUri); - } + DryadLinqMetaData meta = DryadLinqMetaData.Get(Context, this.m_dataSourceUri); if (meta != null) { //check the record-type matches meta-data. (disabled until final API is determined) @@ -510,10 +507,10 @@ namespace Microsoft.Research.DryadLinq // StaticConfig.AllowNullRecords != meta.AllowNullRecords) //{ // DryadLinqClientLog.Add("Warning: Table was generated with AllowNullFields=" + - // meta.AllowNullFields + - // ", AllowNullRecords=" + meta.AllowNullRecords + - // ", and AllowNullArrayElements=" + - // meta.AllowNullArrayElements); + // meta.AllowNullFields + + // ", AllowNullRecords=" + meta.AllowNullRecords + + // ", and AllowNullArrayElements=" + + // meta.AllowNullArrayElements); //} } @@ -523,7 +520,8 @@ namespace Microsoft.Research.DryadLinq DistinctInfo dinfo = DataSetInfo.NoDistinct; this.m_dataSetInfo = new DataSetInfo(pinfo, oinfo, dinfo); - this.m_tableEnumerable = new DryadLinqQueryEnumerable(this.DataProvider, this.Context, this.m_dataSourceUri); + this.m_tableEnumerable + = new DryadLinqQueryEnumerable(this.Context, this.DataProvider, this.m_dataSourceUri); // YY: query expression and provider are at least set consistently if (Context.LocalDebug) @@ -587,12 +585,14 @@ namespace Microsoft.Research.DryadLinq internal class DryadLinqQueryEnumerable : IEnumerable, IEnumerable { private DryadLinqContext m_context; - private Stream m_stream; + private DataProvider m_dataProvider; + private Uri m_dataSetUri; - public DryadLinqQueryEnumerable(DataProvider dataProvider, DryadLinqContext context, Uri dataSetUri) + public DryadLinqQueryEnumerable(DryadLinqContext context, DataProvider dataProvider, Uri dataSetUri) { this.m_context = context; - this.m_stream = dataProvider.Egress(context, dataSetUri); + this.m_dataProvider = dataProvider; + this.m_dataSetUri = dataSetUri; } IEnumerator IEnumerable.GetEnumerator() @@ -602,7 +602,7 @@ namespace Microsoft.Research.DryadLinq public IEnumerator GetEnumerator() { - return new TableEnumerator(this.m_context, this.m_stream); + return new TableEnumerator(this.m_context, this.m_dataProvider, this.m_dataSetUri); } // Internal enumerator class @@ -612,18 +612,19 @@ namespace Microsoft.Research.DryadLinq private DryadLinqFactory m_factory; private DryadLinqRecordReader m_reader; - internal TableEnumerator(DryadLinqContext context, Stream stream) + internal TableEnumerator(DryadLinqContext context, DataProvider dataProvider, Uri dataSetUri) { this.m_current = default(T); this.m_factory = (DryadLinqFactory)DryadLinqCodeGen.GetFactory(context, typeof(T)); + Stream stream = dataProvider.Egress(context, dataSetUri); DryadLinqBlockStream nativeStream = new DryadLinqBlockStream(stream); this.m_reader = this.m_factory.MakeReader(nativeStream); - // this.m_reader.StartWorker(); + this.m_reader.StartWorker(); } public bool MoveNext() { - return this.m_reader.ReadRecordSync(ref this.m_current); + return this.m_reader.ReadRecordAsync(ref this.m_current); } object IEnumerator.Current diff --git a/LinqToDryad/DryadLinqQueryGen.cs b/LinqToDryad/DryadLinqQueryGen.cs index adb8dcb..7522582 100644 --- a/LinqToDryad/DryadLinqQueryGen.cs +++ b/LinqToDryad/DryadLinqQueryGen.cs @@ -60,7 +60,6 @@ namespace Microsoft.Research.DryadLinq private DLinqQueryNode[] m_queryPlan3; private Uri[] m_outputTableUris; private bool[] m_isTempOutput; - private Uri[] m_outputDatapaths; private Type[] m_outputTypes; private QueryNodeInfo[] m_queryNodeInfos; private DryadLinqQuery[] m_outputTables; @@ -82,8 +81,7 @@ namespace Microsoft.Research.DryadLinq bool isTempOutput) { this.m_queryExprs = new Expression[] { queryExpr }; - Uri fullTableUri = tableUri; - this.m_outputTableUris = new Uri[] { fullTableUri }; + this.m_outputTableUris = new Uri[] { tableUri }; this.m_isTempOutput = new bool[] { isTempOutput }; this.m_context = context; this.Initialize(vertexCodeGen); @@ -137,7 +135,6 @@ namespace Microsoft.Research.DryadLinq // Initialize the data structures for the output tables this.m_outputTypes = new Type[this.m_queryExprs.Length]; - this.m_outputDatapaths = new Uri[this.m_queryExprs.Length]; this.m_queryNodeInfos = new QueryNodeInfo[this.m_queryExprs.Length]; for (int i = 0; i < this.m_queryExprs.Length; i++) @@ -145,9 +142,7 @@ namespace Microsoft.Research.DryadLinq this.m_queryNodeInfos[i] = this.BuildNodeInfoGraph(this.m_queryExprs[i]); this.m_queryNodeInfos[i] = new DummyQueryNodeInfo(this.m_queryExprs[i], false, this.m_queryNodeInfos[i]); - this.m_outputDatapaths[i] = this.m_outputTableUris[i]; - - if (!DataPath.IsValidDataPath(this.m_outputDatapaths[i])) + if (!DataPath.IsValidDataPath(this.m_outputTableUris[i])) { throw new DryadLinqException(DryadLinqErrorCode.UnrecognizedDataSource, String.Format(SR.UnrecognizedDataSource, @@ -303,7 +298,7 @@ namespace Microsoft.Research.DryadLinq for (int i = 0; i < this.m_queryExprs.Length; i++) { - DryadLinqClientLog.Add("Query " + i + " Output: " + this.m_outputDatapaths[i]); + DryadLinqClientLog.Add("Query " + i + " Output: " + this.m_outputTableUris[i]); DLinqQueryNode queryNode = this.m_queryPlan1[i]; if (TypeSystem.IsAnonymousType(queryNode.OutputTypes[0])) @@ -334,7 +329,7 @@ namespace Microsoft.Research.DryadLinq // Add the output node CompressionScheme outputScheme = this.m_context.OutputDataCompressionScheme; DLinqOutputNode outputNode = new DLinqOutputNode(this.m_context, - this.m_outputDatapaths[i], + this.m_outputTableUris[i], this.m_isTempOutput[i], outputScheme, this.m_queryExprs[i], @@ -342,13 +337,14 @@ namespace Microsoft.Research.DryadLinq this.m_queryPlan1[i] = outputNode; - if (this.m_outputUriMap.ContainsKey(this.m_outputDatapaths[i].AbsoluteUri.ToLower())) + string outputUri = this.m_outputTableUris[i].AbsoluteUri.ToLower(); + if (this.m_outputUriMap.ContainsKey(outputUri)) { throw new DryadLinqException(DryadLinqErrorCode.MultipleOutputsWithSameDscUri, - String.Format(SR.MultipleOutputsWithSameUri, this.m_outputDatapaths[i])); + String.Format(SR.MultipleOutputsWithSameUri, this.m_outputTableUris[i])); } - this.m_outputUriMap.Add(this.m_outputDatapaths[i].AbsoluteUri.ToLower(), outputNode); + this.m_outputUriMap.Add(outputUri, outputNode); this.m_outputTypes[i] = this.m_queryPlan1[i].OutputTypes[0]; // Remove useless Tees to make Dryad happy @@ -693,7 +689,7 @@ namespace Microsoft.Research.DryadLinq /// Document holding the xml plan. /// Parent node. /// Resource to add. - /// Handle to the inserted node. + /// The resources should be excluded. private void AddResourceToPlan(XmlDocument queryDoc, XmlElement parent, string resource, @@ -1139,9 +1135,10 @@ namespace Microsoft.Research.DryadLinq else if (expression.NodeType == ExpressionType.Constant) { DLinqInputNode inputNode = new DLinqInputNode(this, (ConstantExpression)expression); - if (!this.m_inputUriMap.ContainsKey(inputNode.Table.DataSourceUri.AbsoluteUri.ToLower())) + string inputUri = inputNode.Table.DataSourceUri.AbsoluteUri.ToLower(); + if (!this.m_inputUriMap.ContainsKey(inputUri)) { - this.m_inputUriMap.Add(inputNode.Table.DataSourceUri.AbsoluteUri.ToLower(), inputNode); + this.m_inputUriMap.Add(inputUri, inputNode); } return inputNode; } diff --git a/LinqToDryad/DryadLinqQueryNode.cs b/LinqToDryad/DryadLinqQueryNode.cs index 43c5c13..53a474a 100644 --- a/LinqToDryad/DryadLinqQueryNode.cs +++ b/LinqToDryad/DryadLinqQueryNode.cs @@ -864,7 +864,7 @@ namespace Microsoft.Research.DryadLinq } if (!queryGen.Context.Equals(this.m_table.Context)) { - throw new DryadLinqException("DryadLinqContext was changed while constructing this query."); + throw new DryadLinqException("This query was constructed using different DryadLinqContexts."); } if (TypeSystem.IsTypeOrAnyGenericParamsAnonymous(queryExpr.Type.GetGenericArguments()[0])) { @@ -887,7 +887,6 @@ namespace Microsoft.Research.DryadLinq } } - //@@TODO[P2]: rename to Query. Also look for other places. internal DryadLinqQuery Table { get { return this.m_table; } @@ -4807,6 +4806,7 @@ namespace Microsoft.Research.DryadLinq /// /// Query generator to instantiate. /// Type of the single output. + /// The number of partitions. /// The upstream nodes internal DLinqDummyNode(DryadLinqQueryGen queryGen, Type outputType, diff --git a/LinqToDryad/DryadLinqQueryRuntime.cs b/LinqToDryad/DryadLinqQueryRuntime.cs index 07d89fa..c389037 100644 --- a/LinqToDryad/DryadLinqQueryRuntime.cs +++ b/LinqToDryad/DryadLinqQueryRuntime.cs @@ -26,6 +26,11 @@ using System.Text; namespace Microsoft.Research.DryadLinq { + /// + /// Represents the current state of a set of DryadLINQ jobs that have already been + /// submitted for execution. A DryadLinqJobInfo object is returned after a job is + /// submitted for execution. + /// public sealed class DryadLinqJobInfo { internal const string JOBID_NOJOB = "NoJob"; @@ -52,16 +57,22 @@ namespace Microsoft.Research.DryadLinq _jobExecutors = Array.AsReadOnly(jobExecutors); } + /// + /// Gets the job ids of the DryadLINQ jobs. + /// public ReadOnlyCollection JobIds { get { return _jobIds; } } - public ReadOnlyCollection HeadNodes + internal ReadOnlyCollection HeadNodes { get { return _headNodes; } } + /// + /// Blocks until all the DryadLINQ jobs terminate. + /// public void Wait() { foreach (var jobExecutor in _jobExecutors) @@ -82,6 +93,9 @@ namespace Microsoft.Research.DryadLinq } } + /// + /// Cancels all the unfinished jobs. + /// public void CancelJob() { foreach (var jobExecutor in _jobExecutors) @@ -93,40 +107,4 @@ namespace Microsoft.Research.DryadLinq } } } - - /// - /// Represents a connection to a cluster service that can execute DryadLinq jobs. - /// - /// - /// A DryadLinqQueryRuntime holds an IScheduler object that is used to submit - /// a DryadLINQ job. - /// - internal sealed class DryadLinqQueryRuntime : IDisposable - { - private string m_headNode; - private IScheduler m_scheduler; - - public DryadLinqQueryRuntime(string headNode) - { - this.m_headNode = headNode; - this.m_scheduler = new YarnScheduler(); - this.m_scheduler.Connect(m_headNode); - } - - public string HostName - { - get { return this.m_headNode; } - } - - public void Dispose() - { - this.m_scheduler.Dispose(); - } - - // Return IScheduler reference for internal use - internal IScheduler GetIScheduler() - { - return this.m_scheduler; - } - } } diff --git a/LinqToDryad/DryadLinqQueryable.cs b/LinqToDryad/DryadLinqQueryable.cs index 34c7b64..9af4726 100644 --- a/LinqToDryad/DryadLinqQueryable.cs +++ b/LinqToDryad/DryadLinqQueryable.cs @@ -31,10 +31,11 @@ using Microsoft.Research.DryadLinq.Internal; namespace Microsoft.Research.DryadLinq { - // This class introduces some new operators into the expression tree. So far, - // there are two classes of new operators: - // 1. HashPartition, RangePartition, Merge - // 2. Apply, ApplyPerPartition + /// + /// This class extends LINQ with a set of new operators that are specific to DryadLINQ. + /// The new operators includes partitioning operators (HashPartition and RangePartition) + /// and the Apply operator that enables stateful transformations on datasets. + /// public static class DryadLinqQueryable { internal static bool IsLocalDebugSource(IQueryable source) @@ -42,6 +43,14 @@ namespace Microsoft.Research.DryadLinq return !(source.Provider is DryadLinqProvider); } + /// + /// Filters a sequence of values based on a predicate. Each element's index is used in + /// the logic of the predicate function. + /// + /// The type of the elements of source + /// The sequence of elements to filter + /// The filter predicate. + /// The elements in the input that satisfy the predicate public static IQueryable LongWhere(this IQueryable source, Expression> predicate) @@ -68,6 +77,16 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Transforms each element of a sequence into a new form by applying a function of + /// the element and its index. + /// + /// The type of the elements of source + /// The type of the elements of the result + /// The sequence of input elements + /// A transform function to apply to each source element; + /// the second parameter of the function represents the index of the source element. + /// The sequence resulting from applying the transformation function on each input element public static IQueryable LongSelect(this IQueryable source, Expression> selector) @@ -94,6 +113,17 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Transforms each element of a sequence into an IEnumerable{T} by applying a function to the + /// element and its index, and then flattens the resulting sequences into one sequence. + /// + /// The type of the elements of source + /// The type of the elements of the result + /// The sequence of input elements + /// A transform function to apply to each source element; + /// the second parameter of the function represents the index of the source element. + /// The sequence resulting from applying the function on each input element and + /// flattening the results public static IQueryable LongSelectMany(this IQueryable source, Expression>> selector) @@ -120,6 +150,19 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Transforms each element of a sequence into an IEnumerable{T} by applying a function to the + /// element and its index, and then flattens the resulting sequences into one sequence. + /// + /// The type of the elements of source + /// The type of the element in the intermediate IEnumerable sequences + /// The type of the elements of the result + /// The sequence of input elements + /// A transform function to apply to each source element; + /// the second parameter of the function represents the index of the source element. + /// A transformation function to apply to each intermediate element + /// The sequence resulting from applying selector to each input element and + /// flattening and transforming the elements in the intermediate sequences public static IQueryable LongSelectMany(this IQueryable source, Expression>> selector, @@ -151,6 +194,13 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Returns the largest prefix of a sequence such that the elements satisfy a specified predicate. + /// + /// The element type of the input sequence + /// The input sequence + /// A predicate to test each element for a condition + /// The largest prefix satisfying the predicate public static IQueryable LongTakeWhile(this IQueryable source, Expression> predicate) @@ -177,6 +227,14 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Skips elements in a sequence as long as a specified condition is true and then returns the + /// remaining elements. The predicate is a function of an element and its index. + /// + /// The element type of the input sequence + /// The input sequence + /// A predicate to test each element for a condition + /// The remaining sequence by skipping the elements in the head that satisfy the predicate public static IQueryable LongSkipWhile(this IQueryable source, Expression> predicate) @@ -212,7 +270,7 @@ namespace Microsoft.Research.DryadLinq /// The function to extract the key from a record /// An EqualityComparer on TKey to compare keys /// The number of partitions to create - /// An IQueryable partitioned according to a key + /// An IQueryable hash-partitioned according to a key public static IQueryable HashPartition(this IQueryable source, Expression> keySelector, @@ -255,7 +313,7 @@ namespace Microsoft.Research.DryadLinq /// the dataset to be partitioned /// The funtion to extract the key from a record /// The number of partitioned to create - /// An IQueryable partitioned according to a key + /// An IQueryable hash-partitioned according to a key public static IQueryable HashPartition(this IQueryable source, Expression> keySelector, @@ -295,7 +353,7 @@ namespace Microsoft.Research.DryadLinq /// The type of the key on which the partition is based /// the dataset to be partitioned /// The function to extract the key from a record - /// An IQueryable partitioned according to a key + /// An IQueryable hash-partitioned according to a key public static IQueryable HashPartition(this IQueryable source, Expression> keySelector) @@ -330,7 +388,7 @@ namespace Microsoft.Research.DryadLinq /// The dataset to be partitioned /// The function to extract the key from a record /// An IComparer on TKey to compare keys - /// An IQueryable partitioned according to a key + /// An IQueryable hash-partitioned according to a key public static IQueryable HashPartition(this IQueryable source, Expression> keySelector, @@ -364,10 +422,11 @@ namespace Microsoft.Research.DryadLinq /// /// The type of the records in the dataset /// The type of the key on which the partition is based + /// The type of the records in the result dataset /// The dataset to be partitioned /// The function to extract the key from a record /// The function to compute output record - /// An IQueryable partitioned according to a key + /// An IQueryable hash-partitioned according to a key public static IQueryable HashPartition(this IQueryable source, Expression> keySelector, @@ -406,11 +465,12 @@ namespace Microsoft.Research.DryadLinq /// /// The type of the records in the dataset /// The type of the key on which the partition is based + /// The type of the records in the result dataset /// The dataset to be partitioned /// The function to extract the key from a record /// An IComparer on TKey to compare keys /// The function to compute output record - /// An IQueryable partitioned according to a key + /// An IQueryable hash-partitioned according to a key public static IQueryable HashPartition(this IQueryable source, Expression> keySelector, @@ -453,7 +513,7 @@ namespace Microsoft.Research.DryadLinq /// The type of the key on which the partition is based /// The dataset to be partitioned /// The function to extract the key from a record - /// An IQueryable partitioned according to a key + /// An IQueryable hash-partitioned according to a key public static IQueryable RangePartition(this IQueryable source, Expression> keySelector) @@ -487,8 +547,8 @@ namespace Microsoft.Research.DryadLinq /// The type of the key on which the partition is based /// The dataset to be partitioned /// The function to extract the key from a record - /// Number of partitions in the output dataset - /// An IQueryable partitioned according to a key + /// The number of partitions in the output dataset + /// An IQueryable partitioned according to a list of range keys determined at runtime public static IQueryable RangePartition(this IQueryable source, Expression> keySelector, @@ -529,7 +589,7 @@ namespace Microsoft.Research.DryadLinq /// The dataset to be partitioned /// The funtion to extract the key from a record /// true if the partition keys are descending - /// An IQueryable partitioned according to a key + /// An IQueryable partitioned according to a list of range keys determined at runtime public static IQueryable RangePartition(this IQueryable source, Expression> keySelector, @@ -557,6 +617,15 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Range partition a dataset using an array of partition keys. + /// + /// The type of the records in the dataset + /// The type of the key on which the partition is based + /// The dataset to be partitioned + /// The funtion to extract the key from a record + /// An array of partition keys, either in ascending or descending order + /// An IQueryable partitioned according to the specified range keys public static IQueryable RangePartition(this IQueryable source, Expression> keySelector, @@ -604,7 +673,7 @@ namespace Microsoft.Research.DryadLinq /// The funtion to extract the key from a record /// true if the partition keys are descending /// Number of partitions in the output dataset - /// An IQueryable partitioned according to a key + /// An IQueryable partitioned according to a list of keys determined at runtime public static IQueryable RangePartition(this IQueryable source, Expression> keySelector, @@ -638,6 +707,16 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Range partition a dataset. The list of range keys are determined dynamically at runtime. + /// + /// The type of the records in the input dataset + /// The type of the key on which the partition is based + /// The input dataset to be partitioned + /// The function to extract the key from a record + /// An IComparer on TKey to compare keys + /// true if the generated keys must be descending; otherwise ascending + /// An IQueryable partitioned according to a list of keys determined at runtime public static IQueryable RangePartition(this IQueryable source, Expression> keySelector, @@ -667,6 +746,16 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Range partition a dataset using a specified list of keys. + /// + /// The type of the records in the input dataset + /// The type of the key on which the partition is based + /// The input dataset to be partitioned + /// The function to extract the key from a record + /// The list of range keys + /// An IComparer on TKey to compare keys + /// An IQueryable partitioned according to a specified list of keys. public static IQueryable RangePartition(this IQueryable source, Expression> keySelector, @@ -713,6 +802,17 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Range partition a dataset. The list of range keys are determined dynamically at runtime. + /// + /// The type of the records in the input dataset + /// The type of the key on which the partition is based + /// The input dataset to be partitioned + /// The function to extract the key from a record + /// An IComparer on TKey to compare keys + /// true if the generated keys must be descending; otherwise ascending + /// The number of partitions in the output dataset + /// An IQueryable partitioned according to a list of keys determined at runtime public static IQueryable RangePartition(this IQueryable source, Expression> keySelector, @@ -749,6 +849,17 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Range partition a dataset using a specified list of keys. + /// + /// The type of the records in the input dataset + /// The type of the key on which the partition is based + /// The input dataset to be partitioned + /// The function to extract the key from a record + /// The list of range keys + /// An IComparer on TKey to compare keys + /// true if the keys must be in descending order; otherwise false + /// An IQueryable partitioned according to a specified list of keys public static IQueryable RangePartition(this IQueryable source, Expression> keySelector, @@ -813,7 +924,7 @@ namespace Microsoft.Research.DryadLinq /// The type of the records of the input dataset /// The type of the records of the output dataset /// The input dataset - /// The function to be applied to the input dataset + /// The function to be applied to the input dataset /// The result of computing applyFunc(source) public static IQueryable Apply(this IQueryable source, @@ -845,7 +956,7 @@ namespace Microsoft.Research.DryadLinq /// he type of the records of the output dataset /// The first input dataset /// The second input dataset - /// The function to be applied to the input datasets + /// The function to be applied to the input datasets /// The result of computing applyFunc(source1, source2) public static IQueryable Apply(this IQueryable source1, @@ -967,7 +1078,7 @@ namespace Microsoft.Research.DryadLinq /// The type of the records of the input dataset /// The type of the records of the output dataset /// The input dataset - /// The function to be applied to the input dataset + /// The function to be applied to the input dataset /// The result of computing applyFunc(source) public static IQueryable ApplyPerPartition( @@ -1000,7 +1111,7 @@ namespace Microsoft.Research.DryadLinq /// he type of the records of the output dataset /// The first input dataset /// The second input dataset - /// The function to be applied to the input datasets + /// The function to be applied to the input datasets /// True if only distributive over the first dataset /// The result of computing applyFunc(source1, source2) public static IQueryable @@ -1043,7 +1154,7 @@ namespace Microsoft.Research.DryadLinq /// Other input datasets /// The function to be applied to the input datasets /// True if only distributive over the first dataset - /// The result of computing applyFunc(source,pieces) + /// The result of computing applyFunc(source, otherSources) public static IQueryable ApplyPerPartition( this IQueryable source, @@ -1165,7 +1276,7 @@ namespace Microsoft.Research.DryadLinq /// The input dataset /// The code body of the DoWhile loop /// The termination condition of the DoWhile loop - /// The output dataset + /// The output dataset when the loop terminates public static IQueryable DoWhile(this IQueryable source, Func, IQueryable> body, @@ -1202,7 +1313,7 @@ namespace Microsoft.Research.DryadLinq /// The input dataset /// The function to apply to every sliding window /// The size of the window - /// + /// An IQueryable formed by the results for each sliding window public static IQueryable SlidingWindow(this IQueryable source, Expression, T2>> procFunc, @@ -1231,6 +1342,16 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Computes a user-defined function on each partition of the input. The function takes a + /// partition and its partition index as arguments. + /// + /// The type of the input records + /// The type of the output records + /// The input dataset + /// The function to apply to each partition + /// An IQueryable formed by concatenating the results of applying the function + /// to each partition public static IQueryable ApplyWithPartitionIndex(this IQueryable source, Expression, int, IEnumerable>> procFunc) @@ -1253,6 +1374,13 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Same as , but returns an <> + /// containing a single element. + /// + /// The type of the elements of source. + /// The input sequence + /// true iff the input sequence contains at least one element public static IQueryable AnyAsQuery(this IQueryable source) { if (source == null) @@ -1272,6 +1400,15 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Same as , but returns an <> + /// containing a single element. + /// + /// The type of the elements of source. + /// The input sequence + /// A predicate to test each element for a condition. + /// true iff the input sequence contains at least one element that satisfies + /// the predicate public static IQueryable AnyAsQuery(this IQueryable source, Expression> predicate) { @@ -1296,6 +1433,14 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Same as , but returns an <> + /// containing a single element. + /// + /// The type of the elements of source. + /// The input sequence + /// A predicate to test each element for a condition + /// true iff every element in the input sequence satisfies the predicate public static IQueryable AllAsQuery(this IQueryable source, Expression> predicate) { @@ -1320,6 +1465,13 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Same as , but returns an <> + /// containing a single element. + /// + /// The type of the elements of source + /// The input sequence + /// The number of elements in the input sequence public static IQueryable CountAsQuery(this IQueryable source) { if (source == null) @@ -1339,6 +1491,14 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Same as , but returns an <> + /// containing a single element. + /// + /// The type of the elements of source + /// The input sequence + /// A predicate to test each element for a condition + /// The number of elements in the input sequence satisfying the predicate public static IQueryable CountAsQuery(this IQueryable source, Expression> predicate) { @@ -1363,6 +1523,13 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Same as , but returns an <> + /// containing a single element. + /// + /// The type of the elements of source + /// The input sequence + /// The number of elements in the input sequence public static IQueryable LongCountAsQuery(this IQueryable source) { if (source == null) @@ -1382,6 +1549,14 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Same as , but returns an <> + /// containing a single element. + /// + /// The type of the elements of source + /// The input sequence + /// A predicate to test each element for a condition + /// The number of elements in the input sequence satisfying the predicate public static IQueryable LongCountAsQuery(this IQueryable source, Expression> predicate) { @@ -1406,6 +1581,14 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Same as , but returns an <> + /// containing a single element. + /// + /// The type of the elements of source + /// The input sequence + /// The value to locate in the sequence + /// true iff the source sequence contains an element of the specified value public static IQueryable ContainsAsQuery(this IQueryable source, TSource item) { @@ -1426,6 +1609,15 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Same as , but returns an <> + /// containing a single element. + /// + /// The type of the elements of source + /// The input sequence + /// The value to locate in the sequence + /// The equality comparer to use + /// true iff the source sequence contains an element of the specified value public static IQueryable ContainsAsQuery(this IQueryable source, TSource item, @@ -1456,6 +1648,14 @@ namespace Microsoft.Research.DryadLinq return Expression.Constant(source.ToArray(), typeof(TSource[])); } + /// + /// Same as , but returns an <> + /// containing a single element. + /// + /// The type of the elements of source + /// The first input sequence + /// The second input sequence + /// true iff the two input sequences are equal public static IQueryable SequenceEqualAsQuery(this IQueryable source1, IEnumerable source2) @@ -1480,7 +1680,16 @@ namespace Microsoft.Research.DryadLinq new Expression[] { source1.Expression, GetSourceExpression(source2) } )); } - + + /// + /// Same as , but returns an <> + /// containing a single element. + /// + /// The type of the elements of source + /// The first input sequence + /// The second input sequence + /// The equality comparer to use + /// true iff the two input sequences are equal public static IQueryable SequenceEqualAsQuery(this IQueryable source1, IEnumerable source2, @@ -1510,7 +1719,15 @@ namespace Microsoft.Research.DryadLinq } )); } - + + /// + /// Same as , but returns an <> + /// containing a single element. + /// + /// The type of the elements of source + /// The input sequence + /// The first element in the input sequence + /// The input sequence is empty public static IQueryable FirstAsQuery(this IQueryable source) { if (source == null) @@ -1530,6 +1747,15 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Same as , but returns an <> + /// containing a single element. + /// + /// The type of the elements of source + /// The input sequence + /// A predicate to test each element for a condition + /// The first element in the input sequence that satisfies the predicate + /// No element in the input sequence satisfies the predicate public static IQueryable FirstAsQuery(this IQueryable source, Expression> predicate) @@ -1555,6 +1781,13 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Same as , but returns an <> + /// containing a single element. + /// + /// The type of the elements of source + /// The input sequence + /// The last element in the input sequence public static IQueryable LastAsQuery(this IQueryable source) { if (source == null) @@ -1574,6 +1807,14 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Same as , but returns an <> + /// containing a single element. + /// + /// The type of the elements of source + /// The input sequence + /// A predicate to test each element for a condition + /// The last element in the input sequence that satisfies the predicate public static IQueryable LastAsQuery(this IQueryable source, Expression> predicate) @@ -1599,6 +1840,13 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Same as , but returns an <> + /// containing a single element. + /// + /// The type of the elements of source + /// The input sequence + /// The single element of the input sequence public static IQueryable SingleAsQuery(this IQueryable source) { if (source == null) @@ -1618,9 +1866,17 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Same as , but returns an <> + /// containing a single element. + /// + /// The type of the elements of source + /// The input sequence + /// A predicate to test each element for a condition + /// The single element of the input sequence that satisfies the predicate public static IQueryable SingleAsQuery(this IQueryable source, - Expression> predicate) + Expression> predicate) { if (source == null) { @@ -1643,6 +1899,13 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Same as , but returns an <> + /// containing a single element. + /// + /// The type of the elements of source + /// The input dataset + /// The minimum value in the input dataset public static IQueryable MinAsQuery(this IQueryable source) { if (source == null) @@ -1662,8 +1925,18 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Same as , but returns an <> + /// containing a single element. + /// + /// The type of the elements of source + /// The type of the result value + /// The input dataset + /// A transformation function to apply to each element + /// The minimum value in the transformed values public static IQueryable - MinAsQuery(this IQueryable source, Expression> selector) + MinAsQuery(this IQueryable source, + Expression> selector) { if (source == null) { @@ -1686,6 +1959,13 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Same as , but returns an <> + /// containing a single element. + /// + /// The type of the elements of source + /// The input dataset + /// The maximum value in the input dataset public static IQueryable MaxAsQuery(this IQueryable source) { if (source == null) @@ -1705,9 +1985,18 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Same as , but returns an <> + /// containing a single element. + /// + /// The type of the elements of source + /// The type of the result value + /// The input dataset + /// A transformation function to apply to each element + /// The maximum value in the transformed values public static IQueryable - MaxAsQuery(this IQueryable source, - Expression> selector) + MaxAsQuery(this IQueryable source, + Expression> selector) { if (source == null) { @@ -1730,6 +2019,12 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Same as , but returns an <> + /// containing a single element. Computes the sum of a set of Int32 values. + /// + /// A dataset of Int32 values to calculate the sum of + /// The sum of the values in the dataset public static IQueryable SumAsQuery(this IQueryable source) { if (source == null) @@ -1749,6 +2044,12 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Same as , but returns an <<>> + /// containing a single element. Computes the sum of a set of nullable Int32 values. + /// + /// A dataset of nullable Int32 values to calculate the sum of + /// The sum of the values in the dataset public static IQueryable SumAsQuery(this IQueryable source) { if (source == null) @@ -1768,6 +2069,12 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Same as , but returns an <> + /// containing a single element. Computes the sum of a set of Int64 values. + /// + /// A dataset of Int64 values to calculate the sum of + /// The sum of the values in the dataset public static IQueryable SumAsQuery(this IQueryable source) { if (source == null) @@ -1787,6 +2094,12 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Same as , but returns an <<>> + /// containing a single element. Computes the sum of a set of nullable Int64 values. + /// + /// A dataset of nullable Int64 values to calculate the sum of + /// The sum of the values in the dataset public static IQueryable SumAsQuery(this IQueryable source) { if (source == null) @@ -1806,6 +2119,12 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Same as , but returns an <> + /// containing a single element. Computes the sum of a set of float values. + /// + /// A dataset of float values to calculate the sum of + /// The sum of the values in the dataset public static IQueryable SumAsQuery(this IQueryable source) { if (source == null) @@ -1825,6 +2144,12 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Same as , but returns an <<>> + /// containing a single element. Computes the sum of a set of nullable float values. + /// + /// A dataset of nullable float values to calculate the sum of + /// The sum of the values in the dataset public static IQueryable SumAsQuery(this IQueryable source) { if (source == null) @@ -1844,6 +2169,12 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Same as , but returns an <> + /// containing a single element. Computes the sum of a set of double values. + /// + /// A dataset of double values to calculate the sum of + /// The sum of the values in the dataset public static IQueryable SumAsQuery(this IQueryable source) { if (source == null) @@ -1863,6 +2194,12 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Same as , but returns an <<>> + /// containing a single element. Computes the sum of a set of nullable double values. + /// + /// A dataset of nullable double values to calculate the sum of + /// The sum of the values in the dataset public static IQueryable SumAsQuery(this IQueryable source) { if (source == null) @@ -1882,6 +2219,12 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Same as , but returns an <> + /// containing a single element. Computes the sum of a set of decimal values. + /// + /// A dataset of decimal values to calculate the sum of + /// The sum of the values in the dataset public static IQueryable SumAsQuery(this IQueryable source) { if (source == null) @@ -1901,6 +2244,12 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Same as , but returns an <<>> + /// containing a single element. Computes the sum of a set of nullable decimal values. + /// + /// A dataset of nullable decimal values to calculate the sum of + /// The sum of the values in the dataset public static IQueryable SumAsQuery(this IQueryable source) { if (source == null) @@ -1920,9 +2269,18 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Same as , but returns an <> + /// containing a single element. Computes the sum of a set of Int32 values that + /// is obtained by applying a function to each element of the input dataset. + /// + /// The type of the elements of source + /// The input dataset + /// A transformation function to apply to each element + /// The sum of the values after applying the transformation function public static IQueryable SumAsQuery(this IQueryable source, - Expression> selector) + Expression> selector) { if (source == null) { @@ -1945,6 +2303,15 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Same as , but returns an <<>> + /// containing a single element. Computes the sum of a set of nullable Int32 values that + /// is obtained by applying a function to each element of the input dataset. + /// + /// The type of the elements of source + /// The input dataset + /// A transformation function to apply to each element + /// The sum of the values after applying the transformation function public static IQueryable SumAsQuery(this IQueryable source, Expression> selector) @@ -1970,9 +2337,18 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Same as , but returns an <> + /// containing a single element. Computes the sum of a set of Int64 values that + /// is obtained by applying a function to each element of the input dataset. + /// + /// The type of the elements of source + /// The input dataset + /// A transformation function to apply to each element + /// The sum of the values after applying the transformation function public static IQueryable SumAsQuery(this IQueryable source, - Expression> selector) + Expression> selector) { if (source == null) { @@ -1995,6 +2371,15 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Same as , but returns an <> + /// containing a single element. Computes the sum of a set of nullable Int64 values that + /// is obtained by applying a function to each element of the input dataset. + /// + /// The type of the elements of source + /// The input dataset + /// A transformation function to apply to each element + /// The sum of the values after applying the transformation function public static IQueryable SumAsQuery(this IQueryable source, Expression> selector) @@ -2020,9 +2405,18 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Same as , but returns an <> + /// containing a single element. Computes the sum of a set of float values that + /// is obtained by applying a function to each element of the input dataset. + /// + /// The type of the elements of source + /// The input dataset + /// A transformation function to apply to each element + /// The sum of the values after applying the transformation function public static IQueryable SumAsQuery(this IQueryable source, - Expression> selector) + Expression> selector) { if (source == null) { @@ -2045,6 +2439,15 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Same as , but returns an <> + /// containing a single element. Computes the sum of a set of nullable float values that + /// is obtained by applying a function to each element of the input dataset. + /// + /// The type of the elements of source + /// The input dataset + /// A transformation function to apply to each element + /// The sum of the values after applying the transformation function public static IQueryable SumAsQuery(this IQueryable source, Expression> selector) @@ -2070,9 +2473,18 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Same as , but returns an <> + /// containing a single element. Computes the sum of a set of double values that + /// is obtained by applying a function to each element of the input dataset. + /// + /// The type of the elements of source + /// The input dataset + /// A transformation function to apply to each element + /// The sum of the values after applying the transformation function public static IQueryable SumAsQuery(this IQueryable source, - Expression> selector) + Expression> selector) { if (source == null) { @@ -2095,6 +2507,15 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Same as , but returns an <> + /// containing a single element. Computes the sum of a set of nullable double values that + /// is obtained by applying a function to each element of the input dataset. + /// + /// The type of the elements of source + /// The input dataset + /// A transformation function to apply to each element + /// The sum of the values after applying the transformation function public static IQueryable SumAsQuery(this IQueryable source, Expression> selector) @@ -2120,9 +2541,18 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Same as , but returns an <> + /// containing a single element. Computes the sum of a set of decimal values that + /// is obtained by applying a function to each element of the input dataset. + /// + /// The type of the elements of source + /// The input dataset + /// A transformation function to apply to each element + /// The sum of the values after applying the transformation function public static IQueryable SumAsQuery(this IQueryable source, - Expression> selector) + Expression> selector) { if (source == null) { @@ -2145,9 +2575,18 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Same as , but returns an <> + /// containing a single element. Computes the sum of a set of nullable decimal values that + /// is obtained by applying a function to each element of the input dataset. + /// + /// The type of the elements of source + /// The input dataset + /// A transformation function to apply to each element + /// The sum of the values after applying the transformation function public static IQueryable SumAsQuery(this IQueryable source, - Expression> selector) + Expression> selector) { if (source == null) { @@ -2170,6 +2609,13 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Same as , but returns an <> + /// containing a single element. Computes the average of a set of Int32 values in the input + /// dataset. + /// + /// A set of Int32 values to calculate the average of + /// The average of the values in the input dataset public static IQueryable AverageAsQuery(this IQueryable source) { if (source == null) @@ -2189,6 +2635,13 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Same as , but returns an <<>> + /// containing a single element. Computes the average of a set of nullable Int32 values in the input + /// dataset. + /// + /// A set of nullable Int32 values to calculate the average of + /// The average of the values in the input dataset public static IQueryable AverageAsQuery(this IQueryable source) { if (source == null) @@ -2208,6 +2661,13 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Same as , but returns an <> + /// containing a single element. Computes the average of a set of Int64 values in the input + /// dataset. + /// + /// A set of Int64 values to calculate the average of + /// The average of the values in the input dataset public static IQueryable AverageAsQuery(this IQueryable source) { if (source == null) @@ -2227,6 +2687,13 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Same as , but returns an <<>> + /// containing a single element. Computes the average of a set of nullable Int64 values in the input + /// dataset. + /// + /// A set of nullable Int64 values to calculate the average of + /// The average of the values in the input dataset public static IQueryable AverageAsQuery(this IQueryable source) { if (source == null) @@ -2246,6 +2713,13 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Same as , but returns an <> + /// containing a single element. Computes the average of a set of float values in the input + /// dataset. + /// + /// A set of float values to calculate the average of + /// The average of the values in the input dataset public static IQueryable AverageAsQuery(this IQueryable source) { if (source == null) @@ -2265,6 +2739,13 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Same as , but returns an <<>> + /// containing a single element. Computes the average of a set of nullable float values in the input + /// dataset. + /// + /// A set of nullable float values to calculate the average of + /// The average of the values in the input dataset public static IQueryable AverageAsQuery(this IQueryable source) { if (source == null) @@ -2284,6 +2765,13 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Same as , but returns an <> + /// containing a single element. Computes the average of a set of double values in the input + /// dataset. + /// + /// A set of double values to calculate the average of + /// The average of the values in the input dataset public static IQueryable AverageAsQuery(this IQueryable source) { if (source == null) @@ -2303,6 +2791,13 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Same as , but returns an <<>> + /// containing a single element. Computes the average of a set of nullable double values in the input + /// dataset. + /// + /// A set of nullable double values to calculate the average of + /// The average of the values in the input dataset public static IQueryable AverageAsQuery(this IQueryable source) { if (source == null) @@ -2322,6 +2817,13 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Same as , but returns an <> + /// containing a single element. Computes the average of a set of decimal values in the input + /// dataset. + /// + /// A set of decimal values to calculate the average of + /// The average of the values in the input dataset public static IQueryable AverageAsQuery(this IQueryable source) { if (source == null) @@ -2341,6 +2843,13 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Same as , but returns an <<>> + /// containing a single element. Computes the average of a set of nullable decimal values in the input + /// dataset. + /// + /// A set of nullable decimal values to calculate the average of + /// The average of the values in the input dataset public static IQueryable AverageAsQuery(this IQueryable source) { if (source == null) @@ -2360,9 +2869,18 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Same as , but returns an <> + /// containing a single element. Computes the average of a set of Int32 values that + /// is obtained by applying a function to each element of the input dataset. + /// + /// The type of the elements of source + /// The input dataset + /// A transformation function to apply to each element + /// The average of the values after applying the transformation function public static IQueryable AverageAsQuery(this IQueryable source, - Expression> selector) + Expression> selector) { if (source == null) { @@ -2385,6 +2903,15 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Same as , but returns an <<>> + /// containing a single element. Computes the average of a set of nullable Int32 values that + /// is obtained by applying a function to each element of the input dataset. + /// + /// The type of the elements of source + /// The input dataset + /// A transformation function to apply to each element + /// The average of the values after applying the transformation function public static IQueryable AverageAsQuery(this IQueryable source, Expression> selector) @@ -2410,6 +2937,15 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Same as , but returns an <> + /// containing a single element. Computes the average of a set of float values that + /// is obtained by applying a function to each element of the input dataset. + /// + /// The type of the elements of source + /// The input dataset + /// A transformation function to apply to each element + /// The average of the values after applying the transformation function public static IQueryable AverageAsQuery(this IQueryable source, Expression> selector) @@ -2435,6 +2971,15 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Same as , but returns an <<>> + /// containing a single element. Computes the average of a set of nullable float values that + /// is obtained by applying a function to each element of the input dataset. + /// + /// The type of the elements of source + /// The input dataset + /// A transformation function to apply to each element + /// The average of the values after applying the transformation function public static IQueryable AverageAsQuery(this IQueryable source, Expression> selector) @@ -2460,9 +3005,18 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Same as , but returns an <> + /// containing a single element. Computes the average of a set of Int64 values that + /// is obtained by applying a function to each element of the input dataset. + /// + /// The type of the elements of source + /// The input dataset + /// A transformation function to apply to each element + /// The average of the values after applying the transformation function public static IQueryable AverageAsQuery(this IQueryable source, - Expression> selector) + Expression> selector) { if (source == null) { @@ -2485,9 +3039,18 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Same as , but returns an <<>> + /// containing a single element. Computes the average of a set of nullable Int64 values that + /// is obtained by applying a function to each element of the input dataset. + /// + /// The type of the elements of source + /// The input dataset + /// A transformation function to apply to each element + /// The average of the values after applying the transformation function public static IQueryable AverageAsQuery(this IQueryable source, - Expression> selector) + Expression> selector) { if (source == null) { @@ -2510,6 +3073,15 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Same as , but returns an <> + /// containing a single element. Computes the average of a set of double values that + /// is obtained by applying a function to each element of the input dataset. + /// + /// The type of the elements of source + /// The input dataset + /// A transformation function to apply to each element + /// The average of the values after applying the transformation function public static IQueryable AverageAsQuery(this IQueryable source, Expression> selector) @@ -2535,6 +3107,15 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Same as , but returns an <<>> + /// containing a single element. Computes the average of a set of nullable double values that + /// is obtained by applying a function to each element of the input dataset. + /// + /// The type of the elements of source + /// The input dataset + /// A transformation function to apply to each element + /// The average of the values after applying the transformation function public static IQueryable AverageAsQuery(this IQueryable source, Expression> selector) { @@ -2559,6 +3140,15 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Same as , but returns an <> + /// containing a single element. Computes the average of a set of decimal values that + /// is obtained by applying a function to each element of the input dataset. + /// + /// The type of the elements of source + /// The input dataset + /// A transformation function to apply to each element + /// The average of the values after applying the transformation function public static IQueryable AverageAsQuery(this IQueryable source, Expression> selector) @@ -2584,6 +3174,15 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Same as , but returns an <<>> + /// containing a single element. Computes the average of a set of nullable decimal values that + /// is obtained by applying a function to each element of the input dataset. + /// + /// The type of the elements of source + /// The input dataset + /// A transformation function to apply to each element + /// The average of the values after applying the transformation function public static IQueryable AverageAsQuery(this IQueryable source, Expression> selector) @@ -2609,6 +3208,15 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Applies an aggregator function over a sequence. + /// + /// The type of the elements of source + /// The type of the accumulator value + /// The input sequence + /// The function that creates the initial accumulator value + /// An accumualator function to apply to each element + /// The final accumulator value public static TAccumulate Aggregate(this IQueryable source, Expression> seedFunc, @@ -2634,7 +3242,17 @@ namespace Microsoft.Research.DryadLinq )); } - + /// + /// Applies an aggregator function over a sequence. + /// + /// The type of the elements of source + /// The type of the accumulator value + /// The type of final result + /// The input sequence + /// The function that creates the initial accumulator value + /// An accumualator function to apply to each element + /// A function to transform the final accumulator value into the result value + /// The result of applying selector to the accumulator value public static TResult Aggregate( this IQueryable source, @@ -2666,6 +3284,14 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Same as , but returns an <> + /// containing a single element. + /// + /// The type of the elements of source + /// The input sequence + /// An accumualator function to apply to each element + /// The final accumulator value public static IQueryable AggregateAsQuery(this IQueryable source, Expression> func) @@ -2691,6 +3317,16 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Same as , but returns an <> + /// containing a single element. + /// + /// The type of the elements of source + /// The type of the accumulator value + /// The input sequence + /// The initial accumulator value + /// An accumualator function to apply to each element + /// The final accumulator value public static IQueryable AggregateAsQuery(this IQueryable source, TAccumulate seed, @@ -2717,6 +3353,18 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Same as , but returns an <> + /// containing a single element. + /// + /// The type of the elements of source + /// The type of the accumulator value + /// The type of the final result + /// The input sequence + /// The initial accumulator value + /// An accumualator function to apply to each element + /// A function to transform the final accumulator value into the result value + /// The result of applying selector to the accumulator value public static IQueryable AggregateAsQuery(this IQueryable source, TAccumulate seed, @@ -2753,7 +3401,7 @@ namespace Microsoft.Research.DryadLinq /// /// The type of the records of the dataset /// The type of the keys on which the partition is based - /// The dataset + /// The input dataset /// The function to extract the key from a record /// The same dataset as input public static IQueryable @@ -2781,6 +3429,15 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Instructs DryadLINQ to assume that the dataset is hash partitioned. + /// + /// The type of the records of the dataset + /// The type of the key on which partition is based + /// The input dataset + /// The function to extract the key from a record + /// An equality comparer to compute the hash code of a key + /// The same dataset as input public static IQueryable AssumeHashPartition(this IQueryable source, Expression> keySelector, @@ -2809,13 +3466,13 @@ namespace Microsoft.Research.DryadLinq } /// - /// Instruct DryadLINQ to assume that the dataset is range partitioned. + /// Instructs DryadLINQ to assume that the dataset is range partitioned. /// /// The type of the records of the dataset /// The type of the key on which partition is based - /// The dataset + /// The input dataset /// The function to extract the key from a record - /// true if the partition keys are ordered descendingly + /// true to assume the partition keys are ordered descendingly /// The same dataset as input public static IQueryable AssumeRangePartition(this IQueryable source, @@ -2843,7 +3500,17 @@ namespace Microsoft.Research.DryadLinq Expression.Constant(isDescending) } )); } - + + /// + /// Instructs DryadLINQ to assume that the dataset is range partitioned. + /// + /// The type of the records of the dataset + /// The type of the key on which partition is based + /// The input dataset + /// The function to extract the key from a record + /// An IComparer on TKey to compare keys + /// true to assume that the partition keys are descending + /// The same dataset as input public static IQueryable AssumeRangePartition(this IQueryable source, Expression> keySelector, @@ -2873,6 +3540,15 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Instructs DryadLINQ to assume that the dataset is range partitioned by a specified list of keys. + /// + /// The type of the records of the dataset + /// The type of the key on which partition is based + /// The input dataset + /// The function to extract the key from a record + /// A list of partition keys + /// The same dataset as input public static IQueryable AssumeRangePartition(this IQueryable source, Expression> keySelector, @@ -2905,6 +3581,16 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Instructs DryadLINQ to assume that the dataset is range partitioned by a specified list of keys. + /// + /// The type of the records of the dataset + /// The type of the key on which partition is based + /// The input dataset + /// The function to extract the key from a record + /// An IComparer on TKey to compare keys + /// A list of partition keys + /// The same dataset as input public static IQueryable AssumeRangePartition(this IQueryable source, Expression> keySelector, @@ -2939,7 +3625,7 @@ namespace Microsoft.Research.DryadLinq } /// - /// Instruct DryadLINQ to assume that each partition of the dataset is ordered. A dataset + /// Instructs DryadLINQ to assume that each partition of the dataset is ordered. A dataset /// is ordered if it is range partitioned and each partition of it is ordered on the same /// key. /// @@ -2947,7 +3633,7 @@ namespace Microsoft.Research.DryadLinq /// The type of the key on which partition is based /// The dataset /// The function to extract the key from a record - /// true if the order is descending + /// true to assume the order is descending /// The same dataset as input public static IQueryable AssumeOrderBy(this IQueryable source, @@ -2975,7 +3661,19 @@ namespace Microsoft.Research.DryadLinq Expression.Constant(isDescending) } )); } - + + /// + /// Instructs DryadLINQ to assume that each partition of the dataset is ordered. A dataset + /// is ordered if it is range partitioned and each partition of it is ordered on the same + /// key. + /// + /// The type of the recrods of the dataset + /// The type of the key on which partition is based + /// The input dataset + /// The function to extract the key from a record + /// An IComparer on TKey to compare keys + /// true to assume the order is descending + /// The same dataset as input public static IQueryable AssumeOrderBy(this IQueryable source, Expression> keySelector, @@ -3005,6 +3703,16 @@ namespace Microsoft.Research.DryadLinq )); } + /// + /// Forks a specified input dataset into two datasets. A specified user-defined function is + /// applied to each partition of the input dataset to produce a sequence of ForkTuples. + /// + /// The type of the elements of source + /// The element type of the first output dataset + /// The element type of the second output dataset + /// The input dataset + /// The function to apply to each partition of the input dataset + /// An IMultiQueryable exposing two output datasets public static IMultiQueryable Fork(this IQueryable source, Expression, IEnumerable>>> mapper) @@ -3027,7 +3735,18 @@ namespace Microsoft.Research.DryadLinq ); return new MultiQueryable(source, expr); } - + + /// + /// Forks a specified input dataset into three datasets. A specified user-defined function is + /// applied to each partition of the input dataset to produce a sequence of ForkTuples. + /// + /// The type of the elements of source + /// The element type of the first output dataset + /// The element type of the second output dataset + /// The element type of the third output dataset + /// The input dataset + /// The function to apply to each partition of the input dataset + /// An IMultiQueryable exposing three output datasets public static IMultiQueryable Fork(this IQueryable source, Expression, IEnumerable>>> mapper) @@ -3052,14 +3771,15 @@ namespace Microsoft.Research.DryadLinq } /// - /// Compute two output datasets from one input dataset. + /// Compute two output datasets from one input dataset. A specified user-defined function is + /// applied to each input element to produce zero or one element for each output dataset. /// /// The type of records of input dataset /// The type of records of first output dataset /// The type of records of second output dataset /// The input dataset /// The function applied to each record of the input - /// An IMultiQueryable for the two output dataset + /// An IMultiQueryable for the two output datasets public static IMultiQueryable Fork(this IQueryable source, Expression>> mapper) @@ -3082,7 +3802,18 @@ namespace Microsoft.Research.DryadLinq ); return new MultiQueryable(source, expr); } - + + /// + /// Forks one input dataset into three output datasets. A specified user-defined function is + /// applied to each input element to produce zero or one element for each output dataset. + /// + /// The type of records of input dataset + /// The type of records of the first output dataset + /// The type of records of the second output dataset + /// The type of records of the third output dataset + /// The input dataset + /// The function applied to each record of the input + /// An IMultiQueryable for the three output datasets public static IMultiQueryable Fork(this IQueryable source, Expression>> mapper) @@ -3106,6 +3837,17 @@ namespace Microsoft.Research.DryadLinq return new MultiQueryable(source, expr); } + /// + /// Divides the input dataset into a collection of datasets based on the keys of the records. + /// The method produces one output dataset for each key in the specified key array. Input + /// records that don't match any of the keys are dropped. + /// + /// The type of records of input dataset + /// The type of the keys of the input records + /// The input dataset + /// The function to extract the key from a record + /// A list of the partition keys + /// An IKeyedMultiQueryable for the output datasets. public static IKeyedMultiQueryable Fork(this IQueryable source, Expression> keySelector, @@ -3153,16 +3895,14 @@ namespace Microsoft.Research.DryadLinq } /// - /// Specifies a stream URI to be populated with data during query execution. + /// Specifies a stream URI to be populated with the result of a specified DryadLINQ query. /// /// The type of the records of the table /// The data source - /// A DSC stream name + /// A stream name + /// If this flag is true, delete the output stream + /// if it already exisit before execution /// A query representing the output data. - - // Note: for both cluster&LocalDebug, we add a node to the query-tree - // Submit/Materialize will process the ToDsc call in both cases. - // This is good for consistency of LocalDebug & Cluser modes -- ie ToDsc is lazy in both cases. public static IQueryable ToStore(this IQueryable source, string streamName, bool deleteIfExists = false) { @@ -3170,21 +3910,20 @@ namespace Microsoft.Research.DryadLinq } /// - /// Specifies a stream URI to be populated with data during query execution. + /// Specifies a stream URI to be populated with the result of a specified DryadLINQ query. /// /// The type of the records of the table /// The data source - /// A DSC stream name + /// The stream name to store the result + /// If this flag is true, delete the output stream + /// if it already exisit before execution /// A query representing the output data. - - // Note: for both cluster&LocalDebug, we add a node to the query-tree - // Submit/Materialize will process the ToDsc call in both cases. - // This is good for consistency of LocalDebug & Cluser modes -- ie ToDsc is lazy in both cases. public static IQueryable ToStore(this IQueryable source, Uri streamName, bool deleteIfExists = false) { DryadLinqContext context = DryadLinqContext.GetContext(source.Provider); DataProvider dataProvider = DataProvider.GetDataProvider(streamName.Scheme); + streamName = dataProvider.RewriteUri(context, streamName, FileAccess.Write); dataProvider.CheckExistence(context, streamName, deleteIfExists); return ToStoreInternal(source, streamName, false); } @@ -3215,9 +3954,6 @@ namespace Microsoft.Research.DryadLinq } DryadLinqContext context = DryadLinqContext.GetContext(source.Provider); - string prefix = DataPath.GetScheme(streamName); - DataProvider dataProvider = DataProvider.GetDataProvider(prefix); - streamName = dataProvider.RewriteUri(context, streamName); IQueryable result = source.Provider.CreateQuery( Expression.Call( null, @@ -3230,7 +3966,7 @@ namespace Microsoft.Research.DryadLinq } /// - /// Submits the query for asynchronous execution. + /// Submits a specified query for asynchronous execution. /// /// The type of the records of the table /// The data source @@ -3267,7 +4003,7 @@ namespace Microsoft.Research.DryadLinq } else { - // visited by LocalDebug: q-nonToDsc.Submit(); + // visited by LocalDebug: q-nonToStore.Submit(); Uri dataSetUri = context.MakeTemporaryStreamUri(); CompressionScheme compressionScheme = context.IntermediateDataCompressionScheme; DryadLinqMetaData metadata @@ -3358,7 +4094,7 @@ namespace Microsoft.Research.DryadLinq } /// - /// Submits the query and then waits for the job to complete + /// Submits a specified query and then waits for the job to complete /// /// If the job completes in error or is cancelled. /// If repeated errors occur while polling for status. @@ -3373,14 +4109,10 @@ namespace Microsoft.Research.DryadLinq } /// - /// Submits a collection of DryadLINQ queries for execution. + /// Submits a list of DryadLINQ queries for asynchronous execution. /// /// Queries to execute. /// Job information for tracking the execution. - /// - /// Every item in sources must be an DryadLINQ IQueryable object that terminates with ToDsc() - /// Only one job will be executed, but the job will produce the output associated with each item in sources. - /// public static DryadLinqJobInfo Submit(params IQueryable[] sources) { if (sources == null) @@ -3526,13 +4258,12 @@ namespace Microsoft.Research.DryadLinq } /// - /// Submits a collection of DryadLinq queries for execution and waits for the job to complete/ + /// Submits a list of DryadLinq queries for execution and waits for the job to complete /// /// If the job completes in error or is cancelled. /// If repeated errors occur while polling for status. - /// The type of the records of the table - /// The data source - /// Information about the execution job. + /// A set of DryadLINQ queries to execute + /// Information about the job being submitted for execution. /// /// Every item in sources must be an DryadLinq IQueryable object that terminates with ToStore() /// Only one job will be executed, but the job will produce the output associated with each item in sources. diff --git a/LinqToDryad/DryadLinqRecordReader.cs b/LinqToDryad/DryadLinqRecordReader.cs index e91fab9..f521acc 100644 --- a/LinqToDryad/DryadLinqRecordReader.cs +++ b/LinqToDryad/DryadLinqRecordReader.cs @@ -28,6 +28,8 @@ using System.Data.SqlTypes; using System.Diagnostics; using Microsoft.Research.DryadLinq; +#pragma warning disable 1591 + namespace Microsoft.Research.DryadLinq.Internal { // This class defines the abstraction of reading Dryad records. diff --git a/LinqToDryad/DryadLinqRecordWriter.cs b/LinqToDryad/DryadLinqRecordWriter.cs index 049c436..8e23ed2 100644 --- a/LinqToDryad/DryadLinqRecordWriter.cs +++ b/LinqToDryad/DryadLinqRecordWriter.cs @@ -28,6 +28,8 @@ using System.Data.SqlTypes; using System.Diagnostics; using Microsoft.Research.DryadLinq; +#pragma warning disable 1591 + namespace Microsoft.Research.DryadLinq.Internal { // This class defines the abstraction of writing DryadLinq records. diff --git a/LinqToDryad/DryadLinqSampler.cs b/LinqToDryad/DryadLinqSampler.cs index 412ffeb..1ee6573 100644 --- a/LinqToDryad/DryadLinqSampler.cs +++ b/LinqToDryad/DryadLinqSampler.cs @@ -29,6 +29,8 @@ using System.Linq; using System.Diagnostics; using Microsoft.Research.DryadLinq; +#pragma warning disable 1591 + namespace Microsoft.Research.DryadLinq.Internal { public static class DryadLinqSampler diff --git a/LinqToDryad/DryadLinqSerialization.cs b/LinqToDryad/DryadLinqSerialization.cs index 30a578a..8ff4c99 100644 --- a/LinqToDryad/DryadLinqSerialization.cs +++ b/LinqToDryad/DryadLinqSerialization.cs @@ -32,15 +32,32 @@ using Microsoft.Research.DryadLinq; namespace Microsoft.Research.DryadLinq { - // If a class T implements DryadLinqSerializer, DryadLinq will use the - // read and write methods of the class to do serialization. + /// + /// The interface for providing user-defined serialization for a .NET type. + /// If a class T implements DryadLinqSerializer{T}, DryadLinq will use the + /// read and write methods of the class to do serialization. + /// + /// The .NET type to be serialized. public interface IDryadLinqSerializer { + /// + /// Reads a record of type T from the specified reader. + /// + /// The reader to read from. + /// A record of type T T Read(DryadLinqBinaryReader reader); + + /// + /// Writes a record of type T to the specified writer. + /// + /// The writer to write to. + /// The record to write. void Write(DryadLinqBinaryWriter writer, T x); } } +#pragma warning disable 1591 + namespace Microsoft.Research.DryadLinq.Internal { public abstract class DryadLinqSerializer : IDryadLinqSerializer diff --git a/LinqToDryad/DryadLinqTextReader.cs b/LinqToDryad/DryadLinqTextReader.cs index c5efe59..86820ac 100644 --- a/LinqToDryad/DryadLinqTextReader.cs +++ b/LinqToDryad/DryadLinqTextReader.cs @@ -28,6 +28,10 @@ using Microsoft.Research.DryadLinq; namespace Microsoft.Research.DryadLinq.Internal { + /// + /// The DryadLINQ class to read texts from a native stream. + /// + /// A DryadLINQ user should not need to use this class directly. public unsafe sealed class DryadLinqTextReader { // The number of bytes we attempt to decode each time @@ -45,11 +49,20 @@ namespace Microsoft.Research.DryadLinq.Internal private Int32 m_curLineEnd; // offset of line end in charBuff private bool m_isClosed; + /// + /// Initializes an instance of DryadLinqTextReader with encoding UTF8. + /// + /// A native stream to read from. public DryadLinqTextReader(NativeBlockStream stream) : this(stream, Encoding.UTF8) { } + /// + /// Initializes an instance of DryadLinqTextReader. + /// + /// A native stream to read from. + /// The text encoding. public DryadLinqTextReader(NativeBlockStream stream, Encoding encoding) { this.m_nativeStream = stream; @@ -67,16 +80,30 @@ namespace Microsoft.Research.DryadLinq.Internal this.m_isClosed = false; } + /// + /// Initializes an instance of DryadLiqnTextReader with encoding UTF8. + /// + /// A native handle for Dryad vertex. + /// A port number that specifies a Dryad channel. public DryadLinqTextReader(IntPtr vertexInfo, UInt32 portNum) : this(new DryadLinqChannel(vertexInfo, portNum, true), Encoding.UTF8) { } + /// + /// Initializes an instance of DryadLiqnTextReader. + /// + /// A native handle for Dryad vertex. + /// A port number that specifies a Dryad channel. + /// The text encoding. public DryadLinqTextReader(IntPtr vertexInfo, UInt32 portNum, Encoding encoding) : this(new DryadLinqChannel(vertexInfo, portNum, true), encoding) { } + /// + /// The finalizer that frees native resources. + /// ~DryadLinqTextReader() { if (!this.m_isClosed) @@ -88,6 +115,10 @@ namespace Microsoft.Research.DryadLinq.Internal } } + /// + /// Gets the length in bytes of the native stream. + /// + /// A 64-bit integer. public Int64 GetTotalLength() { return this.m_nativeStream.GetTotalLength(); @@ -171,12 +202,19 @@ namespace Microsoft.Research.DryadLinq.Internal this.m_curBlockPos = 0; } + /// + /// Moves to the next line. + /// + /// True if there is a next line. public bool MoveNext() { return (this.m_curLineEnd < this.m_charBuffEnd || this.FillCharBuffer() > 0); } - // Reads a line of characters and returns as a string. Returns null if EOF. + /// + /// Reads a line of text from the reader. + /// + /// A string. Returns null if EOF. public string ReadLine() { Debug.Assert(this.m_curLineStart == this.m_curLineEnd); @@ -212,6 +250,9 @@ namespace Microsoft.Research.DryadLinq.Internal return lastLine; } + /// + /// Closes the native stream and frees native resources. + /// public void Close() { if (!this.m_isClosed) @@ -228,6 +269,10 @@ namespace Microsoft.Research.DryadLinq.Internal return this.m_nativeStream.GetURI(); } + /// + /// Returns a string that represents this DryadLinqTextReader object. + /// + /// The string representation. public override string ToString() { return this.m_nativeStream.ToString(); diff --git a/LinqToDryad/DryadLinqTextWriter.cs b/LinqToDryad/DryadLinqTextWriter.cs index 2b67453..4960a90 100644 --- a/LinqToDryad/DryadLinqTextWriter.cs +++ b/LinqToDryad/DryadLinqTextWriter.cs @@ -28,6 +28,10 @@ using Microsoft.Research.DryadLinq; namespace Microsoft.Research.DryadLinq.Internal { + /// + /// The DryadLINQ class to write texts to a native stream. + /// + /// A DryadLINQ user should not need to use this class directly. public unsafe sealed class DryadLinqTextWriter { private const int DefaultBlockSize = 256 * 1024; @@ -47,16 +51,31 @@ namespace Microsoft.Research.DryadLinq.Internal private bool m_isClosed; private bool m_isASCIIOrUTF8; + /// + /// Initializes an instance of DryadLinqTextWriter with encoding UTF8. + /// + /// A native stream to write to. public DryadLinqTextWriter(NativeBlockStream stream) : this(stream, Encoding.UTF8) { } + /// + /// Initializes an instance of DryadLinqTextWriter. + /// + /// A native stream to write to. + /// The text encoding. public DryadLinqTextWriter(NativeBlockStream stream, Encoding encoding) : this(stream, encoding, DefaultBlockSize) { } - + + /// + /// Initializes an instance of DryadLinqTextWriter. + /// + /// A native stream to write to. + /// The text encoding. + /// A hint for the size of write buffer. public DryadLinqTextWriter(NativeBlockStream stream, Encoding encoding, Int32 buffSize) { this.m_nativeStream = stream; @@ -75,22 +94,41 @@ namespace Microsoft.Research.DryadLinq.Internal this.m_isClosed = false; this.m_isASCIIOrUTF8 = (encoding == Encoding.UTF8 || encoding == Encoding.ASCII); } - + + /// + /// Initializes an instance of DryadLiqnTextWriter with encoding UTF8. + /// + /// A native handle for Dryad vertex. + /// A port number that specifies a Dryad channel. + /// A hint for the size of write buffer. public DryadLinqTextWriter(IntPtr vertexInfo, UInt32 portNum, Int32 buffSize) : this(new DryadLinqChannel(vertexInfo, portNum, false), Encoding.UTF8, buffSize) { } + /// + /// Initializes an instance of DryadLiqnTextWriter with encoding UTF8. + /// + /// A native handle for Dryad vertex. + /// A port number that specifies a Dryad channel. + /// The text encoding. + /// A hint for the size of write buffer. public DryadLinqTextWriter(IntPtr vertexInfo, UInt32 portNum, Encoding encoding, Int32 buffSize) : this(new DryadLinqChannel(vertexInfo, portNum, false), encoding, buffSize) { } + /// + /// The finalizer that frees native resources. + /// ~DryadLinqTextWriter() { this.Close(); } + /// + /// A hint for the size of write buffer. + /// public Int32 BufferSizeHint { get { return this.m_bufferSizeHint; } @@ -115,12 +153,20 @@ namespace Microsoft.Research.DryadLinq.Internal return this.m_nativeStream.GetFingerPrint(); } + /// + /// Gets and sets the fingerprint of the content of the writer. + /// public bool CalcFP { get { return this.m_calcFP; } set { this.m_calcFP = value; } } + /// + /// Writes a specified line of text to the writer. + /// + /// The line to write. + /// The number of bytes used to represent the line. public unsafe int WriteLine(string line) { Int32 strLen = line.Length; @@ -163,6 +209,9 @@ namespace Microsoft.Research.DryadLinq.Internal return numBytes + numBytes1; } + /// + /// Flushes the current write buffer. + /// public void Flush() { Debug.Assert(this.m_curLineStart == this.m_curLineEnd); @@ -182,6 +231,9 @@ namespace Microsoft.Research.DryadLinq.Internal this.m_nativeStream.Flush(); } + /// + /// Flushes the write buffer and closes the writer. + /// public void Close() { if (!this.m_isClosed) @@ -230,6 +282,9 @@ namespace Microsoft.Research.DryadLinq.Internal this.m_curBlockSize = newDataBlockInfo.BlockSize; } + /// + /// The size in bytes of the current content of the writer. + /// public Int64 Length { get { @@ -237,6 +292,10 @@ namespace Microsoft.Research.DryadLinq.Internal } } + /// + /// Returns a string that represents this DryadLinqTextWriter object. + /// + /// The string representation. public override string ToString() { return this.m_nativeStream.ToString(); diff --git a/LinqToDryad/DryadLinqVertex.cs b/LinqToDryad/DryadLinqVertex.cs index c00567d..6daff4b 100644 --- a/LinqToDryad/DryadLinqVertex.cs +++ b/LinqToDryad/DryadLinqVertex.cs @@ -38,11 +38,16 @@ using System.Collections.Concurrent; using System.Threading.Tasks; using Microsoft.Research.DryadLinq; +#pragma warning disable 1591 + namespace Microsoft.Research.DryadLinq.Internal { - // This class contains the generic vertex code for each query operation - // supported by DryadLinq. We hope to support most of the LINQ operators. - // Applications can add their own vertex method using extension methods. + /// + /// This class provides the generic vertex runtime for the query operators + /// supported by DryadLINQ. The auto-generated vertex code uses the methods + /// in this class extensively. + /// + /// A DryadLINQ user should not need to use DryadLinqVertex directly. public static class DryadLinqVertex { public static bool s_multiThreading = true; //vertex code will set this at runtime. @@ -1302,6 +1307,8 @@ namespace Microsoft.Research.DryadLinq.Internal /// Type of elements to union. /// Left sorted stream to union. /// Right sorted stream to union. + /// true if both streams are ordered in descending order; + /// otherwise they are in ascending order. /// The union of all elements, in sorted order. public static IEnumerable OrderedUnion(IEnumerable source1, @@ -1318,6 +1325,8 @@ namespace Microsoft.Research.DryadLinq.Internal /// Left sorted stream to union. /// Right sorted stream to union. /// Comparison function to use for TSource. + /// true if both streams are ordered in descending order; + /// otherwise they are in ascending order. /// The union of all elements, in sorted order. public static IEnumerable OrderedUnion(IEnumerable source1, @@ -1446,6 +1455,8 @@ namespace Microsoft.Research.DryadLinq.Internal /// Type of elements to intersect. /// Left sorted stream of values. /// Right sorted stream of values. + /// true if both streams are ordered in descending order; + /// otherwise they are in ascending order. /// public static IEnumerable OrderedIntersect(IEnumerable source1, @@ -1462,6 +1473,8 @@ namespace Microsoft.Research.DryadLinq.Internal /// Left sorted stream of values. /// Right sorted stream of values. /// Comparison function to use. + /// true if both streams are ordered in descending order; + /// otherwise they are in ascending order. /// public static IEnumerable OrderedIntersect(IEnumerable source1, @@ -1559,6 +1572,8 @@ namespace Microsoft.Research.DryadLinq.Internal /// Type of elements to compare. /// Sorted stream from which subtraction occurs. /// Subtracted sorted stream. + /// true if both streams are ordered in descending order; + /// otherwise, they are in ascending order. /// Elements in left steram not ocurring in right stream. public static IEnumerable OrderedExcept(IEnumerable source1, @@ -1575,6 +1590,8 @@ namespace Microsoft.Research.DryadLinq.Internal /// Sorted stream from which subtraction occurs. /// Subtracted sorted stream. /// Function to use for comparison testing. + /// true if both streams are ordered in descending order; + /// otherwise, they are in ascending order. /// Elements in left steram not ocurring in right stream. public static IEnumerable OrderedExcept(IEnumerable source1, diff --git a/LinqToDryad/DryadLinqVertexParams.cs b/LinqToDryad/DryadLinqVertexParams.cs index 99b6ed5..050a12b 100644 --- a/LinqToDryad/DryadLinqVertexParams.cs +++ b/LinqToDryad/DryadLinqVertexParams.cs @@ -25,6 +25,9 @@ using System.Text; namespace Microsoft.Research.DryadLinq.Internal { + /// + /// Compile-time static parameters for DryadLinq vertex. + /// public class DryadLinqVertexParams { private Int32 m_inputArity; @@ -36,6 +39,11 @@ namespace Microsoft.Research.DryadLinq.Internal private string m_remoteArch; private bool m_multiThreading; + /// + /// Initializes a new instance of the DryadLinqVertexParams class. + /// + /// The number of inputs. + /// The number of outputs. public DryadLinqVertexParams(int inputArity, int outputArity) { this.m_inputArity = inputArity; @@ -44,52 +52,86 @@ namespace Microsoft.Research.DryadLinq.Internal this.m_keepInputPortOrder = new bool[inputArity]; } + /// + /// Sets the parameters for a specified input. + /// + /// The index of the input. + /// The number of ports for the given input. + /// true to preserve the port ordering when reading. public void SetInputParams(int index, UInt32 portCount, bool keepPortOrder) { this.m_inputPortCounts[index] = portCount; this.m_keepInputPortOrder[index] = keepPortOrder; } + /// + /// Gets and sets a user friendly name for a vertex stage. + /// public string VertexStageName { get { return this.m_vertexStageName; } set { this.m_vertexStageName = value; } } + /// + /// Gets and sets the number of inputs. + /// public int InputArity { get { return this.m_inputArity; } set { this.m_inputArity = value; } } + /// + /// Gets and sets the number of outputs. + /// public int OutputArity { get { return this.m_outputArity; } set { this.m_outputArity = value; } } + /// + /// Gets and sets the arch flavor of the cluster node. + /// public string RemoteArch { get { return this.m_remoteArch; } set { this.m_remoteArch = value; } } + /// + /// Gets and sets the buffering policy for output. + /// public bool UseLargeBuffer { get { return this.m_useLargeBuffer; } set { this.m_useLargeBuffer = value; } } + /// + /// Determines if port ordering needs to be preserved for reading from a specified input. + /// + /// The index of the input. + /// true to preserve port order for a specified input. public bool KeepInputPortOrder(UInt32 index) { return this.m_keepInputPortOrder[index]; } + /// + /// Returns the number of ports for a specified input. + /// + /// The index of the input. + /// The number of ports. public UInt32 InputPortCount(UInt32 index) { return this.m_inputPortCounts[index]; } + /// + /// Determines if multi-threading is enabled for DryadLINQ vertex runtime. + /// public bool MultiThreading { get { return this.m_multiThreading; } diff --git a/LinqToDryad/DryadLinqVertexReader.cs b/LinqToDryad/DryadLinqVertexReader.cs index 848f56b..5c7eea7 100644 --- a/LinqToDryad/DryadLinqVertexReader.cs +++ b/LinqToDryad/DryadLinqVertexReader.cs @@ -28,10 +28,16 @@ using System.Linq.Expressions; using System.Linq; using Microsoft.Research.DryadLinq; +#pragma warning disable 1591 + namespace Microsoft.Research.DryadLinq.Internal { - // The class encapsulates the external environment in which a - // managed query operator reads from Dryad channels. + /// + /// The class encapsulates the external environment in which a managed + /// DryadLINQ vertex reads from input channels. + /// + /// The record type. + /// A DryadLINQ user should not use DryadLinqVertexWriter directly. public class DryadLinqVertexReader : IMultiEnumerable { private VertexEnv m_dvertexEnv; diff --git a/LinqToDryad/DryadLinqVertexWriter.cs b/LinqToDryad/DryadLinqVertexWriter.cs index 56309a7..caf5402 100644 --- a/LinqToDryad/DryadLinqVertexWriter.cs +++ b/LinqToDryad/DryadLinqVertexWriter.cs @@ -27,10 +27,16 @@ using System.Linq.Expressions; using System.Linq; using Microsoft.Research.DryadLinq; +#pragma warning disable 1591 + namespace Microsoft.Research.DryadLinq.Internal { - // The class encapsulates the external environment in which a managed - // query operator writes to DryadLinq channels. + /// + /// The class encapsulates the external environment in which a managed + /// DryadLINQ vertex writes to output channels. + /// + /// The record type. + /// A DryadLINQ user should not use DryadLinqVertexWriter directly. public class DryadLinqVertexWriter { private VertexEnv m_dvertexEnv; diff --git a/LinqToDryad/DscClientHelper.cs b/LinqToDryad/DscClientHelper.cs deleted file mode 100644 index fb0251a..0000000 --- a/LinqToDryad/DscClientHelper.cs +++ /dev/null @@ -1,553 +0,0 @@ -/* -Copyright (c) Microsoft Corporation - -All rights reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in -compliance with the License. You may obtain a copy of the License -at http://www.apache.org/licenses/LICENSE-2.0 - - -THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER -EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED WARRANTIES OR CONDITIONS OF -TITLE, FITNESS FOR A PARTICULAR PURPOSE, MERCHANTABLITY OR NON-INFRINGEMENT. - - -See the Apache Version 2.0 License for specific language governing permissions and -limitations under the License. - -*/ - -using System; -using System.Text; -using System.IO; -using System.Runtime.InteropServices; -using System.Diagnostics; -using System.IO.Compression; -using System.Collections; -using System.Collections.Generic; -using Microsoft.Win32.SafeHandles; -using Microsoft.Research.DryadLinq; - -namespace Microsoft.Research.DryadLinq.Internal -{ - internal class DscIOStream : System.IO.Stream - { - private DscService m_dscClient; - private string m_fileSetName; - private FileAccess m_mode; - private FileStream m_fstream; - private DscFileSet m_dscFileSet; - private IEnumerator m_dscFileEnumerator; - private ulong size = 0; - private bool m_atEOF; - private CompressionScheme m_compressionScheme; - - public DscIOStream(string streamName, FileAccess access, CompressionScheme compressionScheme) - { - if (String.IsNullOrEmpty(streamName)) - { - throw new ArgumentNullException("streamName"); - } - - Uri streamUri = new Uri(streamName); - this.m_dscClient = new DscService(streamUri.Host); - this.m_fileSetName = streamUri.LocalPath; - this.m_mode = access; - this.m_fstream = null; - this.m_atEOF = false; - this.m_compressionScheme = compressionScheme; - - if (access == FileAccess.Read) - { - this.m_dscFileSet = this.m_dscClient.GetFileSet(streamName); - this.m_dscFileEnumerator = this.m_dscFileSet.GetFiles().GetEnumerator(); - } - else if (access == FileAccess.Write) - { - this.m_dscFileSet = this.m_dscClient.CreateFileSet(streamName, compressionScheme); - } - else - { - throw new ArgumentException(SR.ReadWriteNotSupported, "access"); - } - } - - public DscIOStream(string streamName, - FileAccess access, - FileMode createMode, - CompressionScheme compressionScheme) - { - if (String.IsNullOrEmpty(streamName)) - { - throw new ArgumentNullException("streamName"); - } - - Uri streamUri = new Uri(streamName); - this.m_dscClient = new DscService(streamUri.Host); - this.m_fileSetName = streamUri.LocalPath.TrimStart('/'); - this.m_mode = access; - this.m_compressionScheme = compressionScheme; - - bool streamExists = this.m_dscClient.FileSetExists(this.m_fileSetName); - - if (access == FileAccess.Read) - { - switch (createMode) - { - case FileMode.Open: - case FileMode.OpenOrCreate: - { - if (!streamExists) - { - throw new FileNotFoundException(String.Format(SR.StreamDoesNotExist , streamName)); - } - break; - } - case FileMode.Append: - case FileMode.Create: - case FileMode.CreateNew: - case FileMode.Truncate: - { - throw new NotSupportedException(); - } - default: - { - throw new NotSupportedException(); - } - } - - this.m_dscFileSet = this.m_dscClient.GetFileSet(streamName); - this.m_dscFileEnumerator = this.m_dscFileSet.GetFiles().GetEnumerator(); - } - else if (access == FileAccess.Write) - { - switch (createMode) - { - case FileMode.Append: - if (!streamExists) - { - this.m_dscFileSet = this.m_dscClient.CreateFileSet(this.m_fileSetName, this.m_compressionScheme); - } - break; - case FileMode.Create: - if (streamExists) - { - this.m_dscClient.DeleteFileSet(this.m_fileSetName); - } - this.m_dscFileSet = this.m_dscClient.CreateFileSet(this.m_fileSetName, this.m_compressionScheme); - break; - case FileMode.CreateNew: - if (streamExists) - { - throw new IOException(String.Format(SR.StreamAlreadyExists, streamName)); - } - break; - case FileMode.Truncate: - if (streamExists) - { - this.m_dscClient.DeleteFileSet(this.m_fileSetName); - } - this.m_dscFileSet = this.m_dscClient.CreateFileSet(this.m_fileSetName, this.m_compressionScheme); - break; - case FileMode.Open: - case FileMode.OpenOrCreate: // TODO: this should be dealt with correctly, - // although it's not obvious what open should do - throw new NotSupportedException("'" + createMode.ToString() + "' not supported"); - } - } - else - { - throw new ArgumentException(SR.ReadWriteNotSupported, "access"); - } - - this.m_fstream = null; - this.m_atEOF = false; - } - - public override bool CanRead - { - get { - return this.m_mode == FileAccess.Read; - } - } - - public override bool CanSeek - { - get { return false; } - } - - public override bool CanWrite - { - get { - return this.m_mode == FileAccess.Write; - } - } - - public override void Close() - { - try - { - if (this.m_fstream != null && this.m_mode == FileAccess.Write) - { - this.SealPartition(); - } - } - finally - { - this.m_dscClient.Close(); - } - } - - public override void Flush() - { - if (this.m_fstream != null) - { - this.m_fstream.Flush(); - } - } - - public override long Length - { - get { throw new NotImplementedException(); } - } - - private void OpenForRead() - { - Debug.Assert(this.m_fstream == null); - - if (this.m_dscFileEnumerator.MoveNext()) - { - // TODO(bug 15879): Should failover to other readpath on failure if available - string path = this.m_dscFileEnumerator.Current.ReadPaths[0]; - this.m_fstream = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read, 4 * 65536, false); - } - else - { - this.m_atEOF = true; - } - } - - private void OpenForWrite(bool synchronously) - { - if (this.m_fstream != null) - { - throw new InvalidOperationException(); - } - - // @@TODO: Should try to estimate size - DscFile dscFile = this.m_dscFileSet.AddNewFile(1); - this.m_fstream = new FileStream(dscFile.WritePath, FileMode.Create, FileAccess.Write, - FileShare.None, 4 * 65536, synchronously); - } - - internal void SealPartition() - { - if (this.m_fstream != null) - { - this.m_fstream.Close(); - this.m_fstream = null; - - this.m_dscFileSet.Seal(); - } - } - - public override long Position - { - get - { - throw new NotImplementedException(); - } - set - { - throw new NotImplementedException(); - } - } - - public override int Read(byte[] buffer, int offset, int count) - { - if (this.m_mode == FileAccess.Write) - { - throw new DryadLinqException(DryadLinqErrorCode.AttemptToReadFromAWriteStream, - SR.AttemptToReadFromAWriteStream); - } - int totalBytesRead = 0; - while (totalBytesRead < count && !this.m_atEOF) - { - if (this.m_fstream == null) - { - this.OpenForRead(); - if (this.m_atEOF) - { - break; // we hit EOF (EOS, really), so fall out of the loop - } - } - int bytesRead = this.m_fstream.Read(buffer, offset + totalBytesRead, count - totalBytesRead); - totalBytesRead += bytesRead; - if (bytesRead == 0) - { - this.m_fstream.Close(); - this.m_fstream = null; - } - } - return totalBytesRead; - } - - internal unsafe int Read(byte* buffer, int bufferSize) - { - int totalBytesRead = 0; - do - { - SafeFileHandle handle; - if (this.m_fstream == null) - { - this.OpenForRead(); - } - if (this.m_atEOF) break; - - handle = this.m_fstream.SafeFileHandle; - int size = 0; - Int32* pBlockSize = &size; - bool success = DryadLinqNative.ReadFile(handle, buffer, (UInt32)bufferSize, (IntPtr)pBlockSize, null); - if (!success) - { - throw new DryadLinqException(DryadLinqErrorCode.ReadFileError, - String.Format(SR.ReadFileError, Marshal.GetLastWin32Error())); - } - totalBytesRead += size; - - if (size == 0) - { - this.m_fstream.Close(); - this.m_fstream = null; - } - } while (totalBytesRead == 0 && !this.m_atEOF); - - return totalBytesRead; - } - - public override long Seek(long offset, SeekOrigin origin) - { - throw new NotSupportedException(); - } - - public override void SetLength(long value) - { - throw new NotSupportedException(); - } - - public unsafe int Write(byte* buffer, int offset, int count) - { - if (this.m_mode == FileAccess.Read) - { - throw new DryadLinqException(DryadLinqErrorCode.AttemptToReadFromAWriteStream, - SR.AttemptToReadFromAWriteStream); - } - if (this.m_fstream == null) - { - this.OpenForWrite(false); - } - - SafeFileHandle handle = this.m_fstream.SafeFileHandle; - int size; - Int32* pBlockSize = &size; - - bool success = DryadLinqNative.WriteFile(handle, buffer, (UInt32)count, (IntPtr)pBlockSize, null); - if (!success) - { - throw new DryadLinqException(DryadLinqErrorCode.WriteFileError, - String.Format(SR.WriteFileError, Marshal.GetLastWin32Error())); - } - - this.size += (ulong)size; - return size; - } - - public override void Write(byte[] buffer, int offset, int count) - { - if (this.m_mode == FileAccess.Read) - { - throw new DryadLinqException(DryadLinqErrorCode.AttemptToReadFromAWriteStream, - SR.AttemptToReadFromAWriteStream); - } - if (this.m_fstream == null) - { - this.OpenForWrite(true); - } - this.size += (ulong)count; - this.m_fstream.Write(buffer, offset, count); - } - } - - /// - /// Handle interaction between DryadLINQ serialization and DSC streams. - /// - internal unsafe class DscBlockStream : NativeBlockStream - { - private const int DefaultBuffSize = 8192*32; - - private DscIOStream m_dscStream; - private CompressionScheme m_compressionScheme; - private bool m_isClosed; - private Stream m_compressStream; - - public DscBlockStream(DscIOStream dscStream, CompressionScheme scheme) - { - this.m_dscStream = dscStream; - this.m_compressionScheme = scheme; - this.m_isClosed = false; - this.m_compressStream = null; - } - - private void Initialize(string filePath, - FileMode mode, - FileAccess access, - CompressionScheme scheme) - { - try - { - this.m_dscStream = new DscIOStream(filePath, access, mode, scheme); - } - catch (Exception e) - { - throw new DryadLinqException(DryadLinqErrorCode.FailedToCreateStream, - String.Format(SR.FailedToCreateStream, filePath), e); - } - this.m_isClosed = false; - this.m_compressionScheme = scheme; - this.m_compressStream = null; - } - - public DscBlockStream(string filePath, FileAccess access, CompressionScheme scheme) - { - FileMode mode = (access == FileAccess.Read) ? FileMode.Open : FileMode.OpenOrCreate; - this.Initialize(filePath, mode, access, scheme); - } - - public DscBlockStream(string filePath, FileMode mode, FileAccess access, CompressionScheme scheme) - { - this.Initialize(filePath, mode, access, scheme); - } - - internal override Int64 GetTotalLength() - { - return (Int64)this.m_dscStream.Length; - } - - internal override DataBlockInfo ReadDataBlock() - { - DataBlockInfo blockInfo; - blockInfo.DataBlock = (byte*)Marshal.AllocHGlobal(DefaultBuffSize); - blockInfo.ItemHandle = (IntPtr)blockInfo.DataBlock; - if (this.m_compressionScheme == CompressionScheme.None) - { - blockInfo.BlockSize = this.m_dscStream.Read(blockInfo.DataBlock, DefaultBuffSize); - } - else - { - if (this.m_compressStream == null) - { - if (this.m_compressionScheme == CompressionScheme.Gzip) - { - this.m_compressStream = new GZipStream(this.m_dscStream, CompressionMode.Decompress); - } - else - { - throw new DryadLinqException(DryadLinqErrorCode.UnknownCompressionScheme, - SR.UnknownCompressionScheme); - } - } - // YY: Made an extra copy here. Could do better. - byte[] buffer = new byte[DefaultBuffSize]; - blockInfo.BlockSize = this.m_compressStream.Read(buffer, 0, DefaultBuffSize); - fixed (byte* pBuffer = buffer) - { - DryadLinqUtil.memcpy(pBuffer, blockInfo.DataBlock, blockInfo.BlockSize); - } - } - - return blockInfo; - } - - internal override unsafe bool WriteDataBlock(IntPtr itemHandle, Int32 numBytesToWrite) - { - byte* dataBlock = (byte*)itemHandle; - if (this.m_compressionScheme == CompressionScheme.None) - { - Int32 numBytesWritten = 0; - Int32 remainingBytes = numBytesToWrite; - - while (remainingBytes > 0) - { - numBytesWritten = this.m_dscStream.Write(dataBlock, 0, remainingBytes); - dataBlock += numBytesWritten; - remainingBytes -= numBytesWritten; - } - } - else - { - if (this.m_compressStream == null) - { - if (this.m_compressionScheme == CompressionScheme.Gzip) - { - this.m_compressStream = new GZipStream(this.m_dscStream, CompressionMode.Compress); - } - else - { - throw new DryadLinqException(DryadLinqErrorCode.UnknownCompressionScheme, - SR.UnknownCompressionScheme); - } - } - // YY: Made an extra copy here. Could do better. - byte[] buffer = new byte[numBytesToWrite]; - fixed (byte* pBuffer = buffer) - { - DryadLinqUtil.memcpy(dataBlock, pBuffer, numBytesToWrite); - } - this.m_compressStream.Write(buffer, 0, numBytesToWrite); - } - return true; - } - - internal override void Flush() - { - if (this.m_compressStream != null) - { - this.m_compressStream.Flush(); - } - this.m_dscStream.Flush(); - } - - internal override void Close() - { - if (!this.m_isClosed) - { - this.m_isClosed = true; - if (this.m_compressStream != null) - { - this.m_compressStream.Close(); - } - this.m_dscStream.Close(); - this.m_compressStream = null; - this.m_dscStream = null; - } - } - - internal override unsafe DataBlockInfo AllocateDataBlock(Int32 size) - { - DataBlockInfo blockInfo; - blockInfo.ItemHandle = Marshal.AllocHGlobal((IntPtr)size); - blockInfo.DataBlock = (byte*)blockInfo.ItemHandle; - blockInfo.BlockSize = size; - return blockInfo; - } - - internal override unsafe void ReleaseDataBlock(IntPtr itemHandle) - { - if (itemHandle != IntPtr.Zero) - { - Marshal.FreeHGlobal(itemHandle); - } - } - } -} diff --git a/LinqToDryad/DscStubs.cs b/LinqToDryad/DscStubs.cs deleted file mode 100644 index 66cc472..0000000 --- a/LinqToDryad/DscStubs.cs +++ /dev/null @@ -1,170 +0,0 @@ -/* -Copyright (c) Microsoft Corporation - -All rights reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in -compliance with the License. You may obtain a copy of the License -at http://www.apache.org/licenses/LICENSE-2.0 - - -THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER -EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED WARRANTIES OR CONDITIONS OF -TITLE, FITNESS FOR A PARTICULAR PURPOSE, MERCHANTABLITY OR NON-INFRINGEMENT. - - -See the Apache Version 2.0 License for specific language governing permissions and -limitations under the License. - -*/ - -//-------------------------------------------------------------------------- -// -// -// Fileset compression modes supported by DSC. -// -//-------------------------------------------------------------------------- - -using System; -using System.Collections.Generic; -using System.Linq; - -namespace Microsoft.Research.DryadLinq -{ - //YARN - public enum CompressionScheme - { - None, - Gzip - } - - public class DscService - { - private string m_headNode; - public DscService(string headNode) - { - m_headNode = headNode; - } - - internal DscFileSet GetFileSet(string streamName) - { - throw new NotImplementedException(); - } - - internal bool FileSetExists(string dscFileSetName) - { - throw new NotImplementedException(); - } - - internal void DeleteFileSet(string dscFileSetName) - { - throw new NotImplementedException(); - } - - internal DscFileSet CreateFileSet(string streamName, CompressionScheme compressionScheme) - { - throw new NotImplementedException(); - } - - internal void Close() - { - throw new NotImplementedException(); - } - - public string HostName { get; set; } - } - - public class DscFileSet - { - internal DscFile AddNewFile(int p) - { - throw new NotImplementedException(); - } - - public CompressionScheme CompressionScheme { get; set; } - - internal byte[] GetMetadata(string p) - { - throw new NotImplementedException(); - } - - internal void Seal() - { - throw new NotImplementedException(); - } - - internal IEnumerable GetFiles() - { - throw new NotImplementedException(); - } - - internal bool IsSealed() - { - throw new NotImplementedException(); - } - - internal void SetLeaseEndTime(DateTime dateTime) - { - throw new NotImplementedException(); - } - - internal void SetMetadata(string p1, byte[] p2) - { - throw new NotImplementedException(); - } - } - - public class DscFile - { - public string[] ReadPaths { get; set; } - public string WritePath { get; set; } - } - - internal class DscInstance: IDisposable - { - public DscInstance(Uri uri) - { - throw new NotImplementedException(); - } - - internal DscStream GetStream(Uri uri) - { - throw new NotImplementedException(); - } - - public void Dispose() - { - throw new NotImplementedException(); - } - } - - internal class DscStream - { - public long Length { get; set; } - - public int PartitionCount { get; set; } - } - - public class DscException : Exception - { - - } - - public interface IScheduler : IDisposable - { - void Connect(string headNode); - - IServerVersion GetServerVersion(); - } - - public interface IServerVersion - { - int Major { get; set; } - - int Minor { get; set; } - - int Build { get; set; } - - int Revision { get; set; } - } -} diff --git a/LinqToDryad/ForkTuple.cs b/LinqToDryad/ForkTuple.cs index 1244360..5b1a09f 100644 --- a/LinqToDryad/ForkTuple.cs +++ b/LinqToDryad/ForkTuple.cs @@ -35,18 +35,29 @@ namespace Microsoft.Research.DryadLinq object Value { get; } } + /// + /// Represents an element that may not have valid value. + /// + /// The element type [Serializable] - public struct ForkValue : IForkValue + public struct ForkValue : IForkValue, IEquatable> { private T m_x; private bool m_hasX; + /// + /// Initializes an instance of ForkValue{T}. Its element has valid value. + /// + /// The value of the element public ForkValue(T x) { this.m_x = x; this.m_hasX = true; } + /// + /// true iff the value of the element is valid. + /// public bool HasValue { get { return this.m_hasX; } @@ -57,21 +68,61 @@ namespace Microsoft.Research.DryadLinq get { return this.m_x; } } + /// + /// Gets the value of the element. It is valid only if HasValue returns true. + /// public T Value { get { return this.m_x; } set { this.m_x = value; this.m_hasX = true; } } + + /// + /// Gets the hash code of this instance of ForkValue. + /// + /// An integer hash code + public override int GetHashCode() + { + if (!this.HasValue) + { + return 0; + } + return this.Value.GetHashCode(); + } + + /// + /// Determines whether the current ForkValue is equal to a specified ForkValue + /// + /// A specified ForkValue + /// true iff the current ForkValue is equal to the argument + public bool Equals(ForkValue fval) + { + if (!this.HasValue) + { + return !fval.HasValue; + } + return fval.HasValue && this.Value.Equals(fval.Value); + } } + /// + /// Represents a pair of elements that may not have valid values. + /// + /// The type of the first element + /// The type of the second element [Serializable] - public struct ForkTuple + public struct ForkTuple : IEquatable> { private T1 m_x; private T2 m_y; private bool m_hasX; private bool m_hasY; + /// + /// Initializes an instnace of ForkTuple of two elements. Both elements have valid values. + /// + /// The first element + /// The second element public ForkTuple(T1 x, T2 y) { this.m_x = x; @@ -80,31 +131,99 @@ namespace Microsoft.Research.DryadLinq this.m_hasY = true; } + /// + /// true iff the value of the first element is valid. + /// public bool HasFirst { get { return this.m_hasX; } } + /// + /// true ifff the value of the second element is valid. + /// public bool HasSecond { get { return this.m_hasY; } } + /// + /// Gets and sets the first element. + /// public T1 First { get { return this.m_x; } set { this.m_x = value; this.m_hasX = true; } } + /// + /// Gets and sets the second element. + /// public T2 Second { get { return this.m_y; } set { this.m_y = value; this.m_hasY = true; } } + + /// + /// Gets the hash code of this instance of ForkTuple. + /// + /// An integer hash code + public override int GetHashCode() + { + int hashCode = 0; + if (this.HasFirst) + { + hashCode = (-1521134295 * hashCode) + this.First.GetHashCode(); + } + if (this.HasSecond) + { + hashCode = (-1521134295 * hashCode) + this.Second.GetHashCode(); + } + return hashCode; + } + + /// + /// Determines whether the current ForkTuple is equal to a specified ForkTuple. + /// + /// A specified ForkTuple + /// true iff the current ForkTuple is equal to the argument + public bool Equals(ForkTuple fval) + { + if (this.HasFirst) + { + if (!fval.HasFirst || !this.First.Equals(fval.First)) + { + return false; + } + } + else + { + if (fval.HasFirst) return false; + } + if (this.HasSecond) + { + if (!fval.HasSecond || !this.Second.Equals(fval.Second)) + { + return false; + } + } + else + { + if (fval.HasSecond) return false; + } + return true; + } } + /// + /// Represents a tuple of three elements that may not have valid values. + /// + /// The type of the first element + /// The type of the second element + /// The type of the third element [Serializable] - public struct ForkTuple + public struct ForkTuple : IEquatable> { private T1 m_x; private T2 m_y; @@ -113,6 +232,12 @@ namespace Microsoft.Research.DryadLinq private bool m_hasY; private bool m_hasZ; + /// + /// Initializes an instance of ForkTuple of three elements. All the elements have valid values. + /// + /// The first element + /// The second element + /// The third element public ForkTuple(T1 x, T2 y, T3 z) { this.m_x = x; @@ -123,57 +248,120 @@ namespace Microsoft.Research.DryadLinq this.m_hasZ = true; } + /// + /// true iff the value of the first element is valid. + /// public bool HasFirst { get { return this.m_hasX; } } + /// + /// true iff the value of the second element is valid. + /// public bool HasSecond { get { return this.m_hasY; } } + /// + /// true iff the value of the third element is valid. + /// public bool HasThird { get { return this.m_hasZ; } } + /// + /// Gets and sets the first element. + /// public T1 First { get { return this.m_x; } set { this.m_x = value; this.m_hasX = true; } } + /// + /// Gets and sets the second element. + /// public T2 Second { get { return this.m_y; } set { this.m_y = value; this.m_hasY = true; } } + /// + /// Gets and sets the third element. + /// public T3 Third { get { return this.m_z; } set { this.m_z = value; this.m_hasZ = true; } } - } - - internal struct ForkTuple - { - private IForkValue[] m_values; - public ForkTuple(params IForkValue[] values) + /// + /// Gets the hash code of this instance of ForkTuple. + /// + /// An integer hash code + public override int GetHashCode() { - this.m_values = values; - } - - public bool HasValue(int index) - { - return this.m_values[index].HasValue; + int hashCode = 0; + if (this.HasFirst) + { + hashCode = (-1521134295 * hashCode) + this.First.GetHashCode(); + } + if (this.HasSecond) + { + hashCode = (-1521134295 * hashCode) + this.Second.GetHashCode(); + } + if (this.HasThird) + { + hashCode = (-1521134295 * hashCode) + this.Third.GetHashCode(); + } + return hashCode; } - public object Value(int index) + /// + /// Determines whether the current ForkTuple is equal to a specified ForkTuple. + /// + /// A specified ForkTuple + /// true iff the current ForkTuple is equal to the argument + public bool Equals(ForkTuple fval) { - return this.m_values[index].Value; + if (this.HasFirst) + { + if (!fval.HasFirst || !this.First.Equals(fval.First)) + { + return false; + } + } + else + { + if (fval.HasFirst) return false; + } + if (this.HasSecond) + { + if (!fval.HasSecond || !this.Second.Equals(fval.Second)) + { + return false; + } + } + else + { + if (fval.HasSecond) return false; + } + if (this.HasThird) + { + if (!fval.HasThird || !this.Third.Equals(fval.Third)) + { + return false; + } + } + else + { + if (fval.HasThird) return false; + } + return true; } } } diff --git a/LinqToDryad/IAssociative.cs b/LinqToDryad/IAssociative.cs index 62b14c1..f52f1a7 100644 --- a/LinqToDryad/IAssociative.cs +++ b/LinqToDryad/IAssociative.cs @@ -25,22 +25,53 @@ using System.Text; namespace Microsoft.Research.DryadLinq { + /// + /// Defines the DryadLINQ interface for associative accumulator. + /// + /// The type of the accumulated value. public interface IAssociative { + /// + /// Provides the initial value for the accumulator. + /// + /// The initial value of the accumulator TAccumulate Seed(); + + /// + /// Combines two accumulator values into one. + /// + /// The value of the accumulator + /// A value to be accumulated + /// The result of combining two accumulator values into one TAccumulate RecursiveAccumulate(TAccumulate acc, TAccumulate val); } + /// + /// A helper class for calling IAssociative methods more efficiently. It is used in + /// auto-generated vertex code. A DryadLINQ user should not need to use this class directly. + /// + /// The type that implements the IAssociative{T} interface + /// The type of the accumulator value. public static class GenericAssociative where TAssoc : IAssociative, new() { private static TAssoc a = new TAssoc(); + /// + /// Provides the initial value for the accumulator. + /// + /// The initial value of the accumulator public static TAccumulate Seed() { return a.Seed(); } + /// + /// Combines two accumulator values into one. + /// + /// The value of the accumulator + /// A value to be accumulated + /// The result of combining two accumulator values into one public static TAccumulate RecursiveAccumulate(TAccumulate acc, TAccumulate val) { return a.RecursiveAccumulate(acc, val); diff --git a/LinqToDryad/IDecomposable.cs b/LinqToDryad/IDecomposable.cs index 2500953..cf14e7c 100644 --- a/LinqToDryad/IDecomposable.cs +++ b/LinqToDryad/IDecomposable.cs @@ -25,40 +25,112 @@ using System.Text; namespace Microsoft.Research.DryadLinq { + /// + /// Defines the DryadLINQ interface for decomposable functions. It allows a function to be + /// decomposed into the composition of several functions that can be executed more efficiently. + /// + /// The record type of the original input. + /// The record type of an intermediate result. + /// The record type of the final result. public interface IDecomposable { + /// + /// Initializes the state of this IDecomposable object. + /// + /// The state. void Initialize(object state); + + /// + /// Converts an input record to an intermediate value. + /// + /// An input record. + /// An intermediate result. TAccumulate Seed(TSource val); + + /// + /// Adds a new input record into the intermediate value. + /// + /// The current intermediate value. + /// A new input record. + /// The new intermediate value. TAccumulate Accumulate(TAccumulate acc, TSource val); + + /// + /// Combines two intermediate values into a new intermediate value. + /// + /// The first intermediate value. + /// The second intermediate value. + /// The new intermediate value. TAccumulate RecursiveAccumulate(TAccumulate acc, TAccumulate val); + + /// + /// Computes the final result from the current intermediate value. + /// + /// An intermediate value. + /// The final result. TResult FinalReduce(TAccumulate val); } + /// + /// A helper class for calling IDecomposable methods more efficiently. It is used in + /// auto-generated vertex code. A DryadLINQ user should not need to use this class directly. + /// + /// The type that implements the IDecomposable interface + /// The element type of the input sequence + /// The element type of an intermediate result + /// The element type of the final result public static class GenericDecomposable where TDecomposable : IDecomposable, new() { private static TDecomposable d = new TDecomposable(); + /// + /// Initializes the initial state of the IDecomposable object. + /// + /// The initial state of this IDecomposable object public static void Initialize(object state) { d.Initialize(state); } + /// + /// Converts an input element to an intermediate accumulator value. + /// + /// An input element + /// An accumulator value public static TAccumulate Seed(TSource val) { return d.Seed(val); } + /// + /// Accumulates an input element into the accumulator value. + /// + /// An accumulator value + /// An input element + /// An accumulator value resulting from applying this Accumulate method + /// on the two arguments public static TAccumulate Accumulate(TAccumulate acc, TSource val) { return d.Accumulate(acc, val); } + /// + /// Combines two accumulator values into one. + /// + /// The first accumulator value + /// The second accumulator value + /// An accumulator value resulting from combining two accumulator values public static TAccumulate RecursiveAccumulate(TAccumulate acc, TAccumulate val) { return d.RecursiveAccumulate(acc, val); } + /// + /// Produces the final value from an accumulator value. + /// + /// An accumulator value + /// The value of the final result public static TResult FinalReduce(TAccumulate val) { return d.FinalReduce(val); diff --git a/LinqToDryad/LineRecord.cs b/LinqToDryad/LineRecord.cs index 7f26c49..4041c3e 100644 --- a/LinqToDryad/LineRecord.cs +++ b/LinqToDryad/LineRecord.cs @@ -27,63 +27,111 @@ using System.Diagnostics; namespace Microsoft.Research.DryadLinq { - // We override Equals and GetHashCode for better performance. + /// + /// The DryadLINQ type to represent a line of text. + /// [Serializable] public struct LineRecord : IComparable, IComparable, IEnumerable, IEnumerable, IEquatable { private string _line; + + /// + /// Initializes a new instance of LineRecord from a string. + /// + /// The input string. + public LineRecord(string line) + { + _line = line; + } + + /// + /// Gets the string value of a LineRecord. + /// public string Line { get { return _line; } internal set { _line = value; } } - public LineRecord(string line) - { - _line = line; - } - + /// + /// Determines whether the specified object is equal to the current LineRecord. + /// + /// The object to compare with. + /// true iff the specified object is equal to the current object. public override bool Equals(Object obj) { if (!(obj is LineRecord)) return false; return this.Line.Equals(((LineRecord)obj).Line); } + /// + /// Determines whether a specified LineRecord is equal to the current LineRecord. + /// + /// A LineRecord to compare with. + /// true iff the argument is equal to the current LineRecord. public bool Equals(LineRecord val) { return this.Line.Equals(val.Line); } - public static bool Equals(LineRecord a, LineRecord b) - { - return a.Equals(b); - } - + /// + /// Determines whether two specified LineRecords are equal. + /// + /// The left LineRecord. + /// The right LineRecord. + /// true iff two LineRecords are equal. public static bool operator ==(LineRecord a, LineRecord b) { return a.Equals(b); } + /// + /// Determines whether two specified LineRecords are not equal. + /// + /// The left LineRecord. + /// The right LineRecord. + /// true iff two LineRecords are not equal. public static bool operator !=(LineRecord a, LineRecord b) { return !a.Equals(b); } + /// + /// Returns true iff a LineRecord is less than another LineRecord. + /// + /// The left LineRecrod. + /// The right LineRecord. + /// true iff left is less than right. public static bool operator <(LineRecord a, LineRecord b) { return a.CompareTo(b) < 0; } + /// + /// Returns true iff a LineRecord is greater than another LineRecord. + /// + /// The left LineRecrod. + /// The right LineRecord. + /// true iff left is greater than right. public static bool operator >(LineRecord a, LineRecord b) { return a.CompareTo(b) > 0; } + /// + /// Returns the hash code of the current LineRecord. + /// + /// A 32-bit signed integer. public override int GetHashCode() { return this.Line.GetHashCode(); } + /// + /// Compares the current LineRecord with an object. + /// + /// The value to compare. + /// An integer that indicates the order. public int CompareTo(Object val) { if (val == null) return 1; @@ -95,7 +143,12 @@ namespace Microsoft.Research.DryadLinq return StringComparer.Ordinal.Compare(this.Line, ((LineRecord)val).Line); } - + + /// + /// Compares the current LineRecord with another LineRecord. + /// + /// The LineRecord to compare with. + /// An integer that indicates the order. public int CompareTo(LineRecord val) { return StringComparer.Ordinal.Compare(this.Line, val.Line); @@ -111,6 +164,10 @@ namespace Microsoft.Research.DryadLinq return this.Line.GetEnumerator(); } + /// + /// Returns a string that represents the current LineRecord. + /// + /// A string that represents the current LineRecord. public override String ToString() { return this.Line; diff --git a/LinqToDryad/LinqToDryad.csproj b/LinqToDryad/LinqToDryad.csproj index 82a92d0..47e645b 100644 --- a/LinqToDryad/LinqToDryad.csproj +++ b/LinqToDryad/LinqToDryad.csproj @@ -1,6 +1,6 @@ - + - + Debug AnyCPU @@ -43,6 +43,7 @@ prompt MinimumRecommendedRules.ruleset false + ..\bin\Release\Microsoft.Research.DryadLinq.XML @@ -63,15 +64,39 @@ False - ..\packages\Microsoft.Research.Peloponnese.0.7.1-beta\lib\net45\Microsoft.Research.Peloponnese.HadoopBridge.dll + ..\packages\Microsoft.Research.Peloponnese.0.7.2-beta\lib\net45\Microsoft.Research.Peloponnese.HadoopBridge.dll - + False - ..\packages\Microsoft.Research.Peloponnese.0.7.1-beta\lib\net45\Microsoft.Research.Peloponnese.Utils.dll + ..\packages\Microsoft.Research.Peloponnese.0.7.2-beta\lib\net45\Microsoft.Research.Peloponnese.Utils.dll + + + False + ..\packages\Microsoft.Bcl.Async.1.0.166\lib\net40\Microsoft.Threading.Tasks.dll + + + False + ..\packages\Microsoft.Bcl.Async.1.0.166\lib\net40\Microsoft.Threading.Tasks.Extensions.dll + + + False + ..\packages\Microsoft.Bcl.Async.1.0.166\lib\net40\Microsoft.Threading.Tasks.Extensions.Desktop.dll + + + False + ..\packages\Microsoft.WindowsAzure.Common.1.0.1\lib\net45\Microsoft.WindowsAzure.Common.dll + + + False + ..\packages\Microsoft.WindowsAzure.Common.1.0.1\lib\net45\Microsoft.WindowsAzure.Common.NetFramework.dll ..\packages\Microsoft.WindowsAzure.ConfigurationManager.2.0.3\lib\net40\Microsoft.WindowsAzure.Configuration.dll + + False + ..\packages\Microsoft.WindowsAzure.Management.1.0.0\lib\net40\Microsoft.WindowsAzure.Management.dll + False ..\packages\Microsoft.WindowsAzure.Management.HDInsight.1.1.0.7\lib\net40\Microsoft.WindowsAzure.Management.HDInsight.dll @@ -84,6 +109,10 @@ False ..\packages\Microsoft.Hadoop.Client.1.1.0.7\lib\net40\Microsoft.WindowsAzure.Management.HDInsight.Framework.Core.dll + + False + ..\packages\Microsoft.WindowsAzure.Management.Storage.1.0.0\lib\net40\Microsoft.WindowsAzure.Management.Storage.dll + False ..\packages\WindowsAzure.Storage.3.1.0.1\lib\net40\Microsoft.WindowsAzure.Storage.dll @@ -97,6 +126,17 @@ + + + + False + ..\packages\Microsoft.Net.Http.2.2.19\lib\net45\System.Net.Http.Extensions.dll + + + False + ..\packages\Microsoft.Net.Http.2.2.19\lib\net45\System.Net.Http.Primitives.dll + + @@ -139,7 +179,6 @@ - @@ -164,7 +203,6 @@ - @@ -176,6 +214,7 @@ + @@ -187,11 +226,8 @@ - - - - + ResXFileCodeGenerator @@ -204,14 +240,19 @@ + + + + + This project references NuGet package(s) that are missing on this computer. Enable NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. - - + + - + - + \ No newline at end of file diff --git a/LinqToDryad/LocalJobSubmission.cs b/LinqToDryad/LocalJobSubmission.cs index 08a48f7..e3c9199 100644 --- a/LinqToDryad/LocalJobSubmission.cs +++ b/LinqToDryad/LocalJobSubmission.cs @@ -103,16 +103,16 @@ namespace Microsoft.Research.DryadLinq var vertexPath = Path.Combine(Context.DryadHomeDirectory, "VertexHost.exe"); string[] jmArgs = { "--dfs=" + logDirParam, vertexPath, queryPlanFile }; return ConfigHelpers.MakeProcessGroup( - "jm", "local", 1, 1, true, - jmPath, jmArgs, null, "graphmanager-stdout.txt", "graphmanager-stderr.txt", - resources, environment); + "jm", "local", 1, 1, true, + jmPath, jmArgs, null, "graphmanager-stdout.txt", "graphmanager-stderr.txt", + resources, environment); } protected override XElement MakeWorkerConfig(string configPath, XElement peloponneseResource) { // add job-local resources to each worker directory, leaving out the standard Dryad files var resources = new List(); - foreach (var rg in LocalResources.Where(rg => rg.Key != Context.DryadHomeDirectory)) + foreach (var rg in LocalResources) { resources.Add(MakeResourceGroup(rg.Key, rg.Value)); } @@ -124,9 +124,9 @@ namespace Microsoft.Research.DryadLinq var psPath = Path.Combine(Context.DryadHomeDirectory, "ProcessService.exe"); string[] psArgs = { configPath }; return ConfigHelpers.MakeProcessGroup( - "Worker", "local", 2, numWorkerProcesses, false, - psPath, psArgs, null, "processservice-stdout.txt", "processservice-stderr.txt", - resources, null); + "Worker", "local", 2, numWorkerProcesses, false, + psPath, psArgs, null, "processservice-stdout.txt", "processservice-stderr.txt", + resources, null); } private string MakeProcessServiceConfig() @@ -185,7 +185,7 @@ namespace Microsoft.Research.DryadLinq private void CreateDirectory() { - m_workingDirectory = null; + this.m_workingDirectory = null; string wdBase = Path.Combine(Context.DryadHomeDirectory, "LocalJobs"); if (!Directory.Exists(wdBase)) @@ -217,15 +217,12 @@ namespace Microsoft.Research.DryadLinq } catch (Exception e) { - lock (this) { m_error = "Failed to create local job directory " + wd + ": " + e.ToString(); m_status = JobStatus.Failure; } - Console.WriteLine(m_error); - return; } @@ -242,13 +239,11 @@ namespace Microsoft.Research.DryadLinq m_error = "Failed to create local log directory " + logD + ": " + e.ToString(); m_status = JobStatus.Failure; } - Console.WriteLine(m_error); - return; } - m_workingDirectory = wd; + this.m_workingDirectory = wd; } void OnJobExited(Object obj, EventArgs args) @@ -287,6 +282,19 @@ namespace Microsoft.Research.DryadLinq public override void SubmitJob() { + if (Context.PeloponneseHomeDirectory == null) + { + throw new ApplicationException("No Peloponnese home directory is set"); + } + if (Context.DryadHomeDirectory == null) + { + throw new ApplicationException("No Dryad home directory is set"); + } + if (!IsValidDryadDirectory(Context.DryadHomeDirectory)) + { + throw new ApplicationException("Dryad home directory " + Context.DryadHomeDirectory + " is missing some required files"); + } + CreateDirectory(); if (m_workingDirectory == null) { diff --git a/LinqToDryad/MultiEnumerable.cs b/LinqToDryad/MultiEnumerable.cs index f94999c..e7a17ed 100644 --- a/LinqToDryad/MultiEnumerable.cs +++ b/LinqToDryad/MultiEnumerable.cs @@ -27,6 +27,8 @@ using System.Reflection; using System.Linq; using Microsoft.Research.DryadLinq; +#pragma warning disable 1591 + namespace Microsoft.Research.DryadLinq.Internal { public interface IMultiEnumerable diff --git a/LinqToDryad/MultiQueryable.cs b/LinqToDryad/MultiQueryable.cs index 6f0fb4b..4e6ce43 100644 --- a/LinqToDryad/MultiQueryable.cs +++ b/LinqToDryad/MultiQueryable.cs @@ -31,34 +31,99 @@ using Microsoft.Research.DryadLinq.Internal; namespace Microsoft.Research.DryadLinq { + /// + /// The base interface to access a collection of IQueryable instances. The + /// DryadLINQ Fork operator returns a value that implements this interface. + /// public interface IMultiQueryable { + /// + /// Gets the element type of the query at a specified index. + /// + /// The index + /// A Type that represents the type of the elements Type ElementType(int index); + + /// + /// Gets the expression tree that is associated with this instance of IMultiQueryable + /// Expression Expression { get; } + + /// + /// Gets the query provider that is associated with this instance of IMultiQueryable + /// IQueryProvider Provider { get; } + + /// + /// Gets the number of queries in this instance of IMultiQueryable + /// UInt32 NumberOfInputs { get; } } - public interface IKeyedMultiQueryable : IMultiQueryable - { - IQueryable this[K key] { get; } - K[] Keys { get; } - } - + /// + /// The interface to access a collection of two IQueryable{T} instances. + /// + /// The element type of the first IQueryable{T} + /// The element type of the second IQueryable{T} public interface IMultiQueryable : IMultiQueryable { + /// + /// Gets the first IQueryable{T} + /// IQueryable First { get; } + + /// + /// Gets the second IQueryable{T} + /// IQueryable Second { get; } } + /// + /// The interface to access a collection of three IQueryable{T} instances. + /// + /// The element type of the first IQueryable{T} + /// The element type of the second IQueryable{T} + /// The element type of the third IQueryable{T} public interface IMultiQueryable : IMultiQueryable { + /// + /// Gets the first IQueryable{T} + /// IQueryable First { get; } - IQueryable Second { get; } + + /// + /// Gets the second IQueryable{T} + /// + IQueryable Second { get; } + + /// + /// Gets the third IQueryable{T} + /// IQueryable Third { get; } } - public class MultiQueryable : IKeyedMultiQueryable + /// + /// The interface to access a collection of IQueryable{T} instances. Each IQueryable{T} + /// contains only elements of the same key. The IQueryable{T}s are indexed by a set of keys. + /// + /// The element type of IQueryable{T}s + /// The key type + public interface IKeyedMultiQueryable : IMultiQueryable + { + /// + /// Gets the IQueryable{T} associated with a specified key. + /// + /// A key + /// The IQueryable{T} associated with the key + IQueryable this[K key] { get; } + + /// + /// Gets the keys. + /// + K[] Keys { get; } + } + + internal class MultiQueryable : IKeyedMultiQueryable { private IQueryable m_source; private Expression m_queryExpression; @@ -141,14 +206,13 @@ namespace Microsoft.Research.DryadLinq return this.ForkChoose(index); } - //@@TODO: throw ArgumentOutOfRangeException? throw new DryadLinqException(DryadLinqErrorCode.MultiQueryableKeyOutOfRange, SR.MultiQueryableKeyOutOfRange); } } } - public class MultiQueryable : IMultiQueryable + internal class MultiQueryable : IMultiQueryable { private IQueryable m_source; private Expression m_queryExpression; @@ -180,7 +244,6 @@ namespace Microsoft.Research.DryadLinq } else { - //@@TODO: throw ArgumentOutOfRangeException? throw new DryadLinqException(DryadLinqErrorCode.IndexOutOfRange, SR.IndexOutOfRange); } @@ -228,7 +291,7 @@ namespace Microsoft.Research.DryadLinq } } - public class MultiQueryable : IMultiQueryable + internal class MultiQueryable : IMultiQueryable { private IQueryable m_source; private Expression m_queryExpression; @@ -264,7 +327,6 @@ namespace Microsoft.Research.DryadLinq } else { - //@@TODO: throw ArgumentOutOfRangeException? throw new DryadLinqException(DryadLinqErrorCode.IndexOutOfRange, SR.IndexOutOfRange); } diff --git a/LinqToDryad/NativeBlockStream.cs b/LinqToDryad/NativeBlockStream.cs index f794103..b32eb76 100644 --- a/LinqToDryad/NativeBlockStream.cs +++ b/LinqToDryad/NativeBlockStream.cs @@ -38,9 +38,10 @@ namespace Microsoft.Research.DryadLinq.Internal internal IntPtr ItemHandle; } - // this type is public on the outside but all its members are marked internal - // because generated vertex code needs to pass around references to it but - // doesn't call any methods, nor should client code. + /// + /// Represents the abstraction of a native stream. This NativeBlockStream class is public + /// because auto-generated vertex code needs to pass around references to it. + /// public abstract class NativeBlockStream { internal abstract Int64 GetTotalLength(); diff --git a/LinqToDryad/PeloponneseJobSubmission.cs b/LinqToDryad/PeloponneseJobSubmission.cs index c813c31..3c3cddc 100644 --- a/LinqToDryad/PeloponneseJobSubmission.cs +++ b/LinqToDryad/PeloponneseJobSubmission.cs @@ -21,18 +21,14 @@ using System; using System.Collections.Generic; using System.Linq; using System.Text; +using System.Diagnostics; +using System.IO; +using System.Net; +using System.Xml.Linq; +using Microsoft.Research.DryadLinq.Internal; namespace Microsoft.Research.DryadLinq { - using System; - using System.Diagnostics; - using System.IO; - using System.Net; - using System.Xml.Linq; - using System.Linq; - using System.Text; - using Microsoft.Research.DryadLinq.Internal; - abstract class PeloponneseJobSubmission : IDryadLinqJobSubmission { private DryadLinqContext m_context; @@ -50,12 +46,50 @@ namespace Microsoft.Research.DryadLinq abstract public JobStatus GetStatus(); abstract public JobStatus TerminateJob(); + static private string[] sharedDryadFiles = + { + "DryadLinqGraphManager.exe", + "DryadLinqGraphManager.exe.config", + "Microsoft.Research.Dryad.dll", + "DryadHttpClusterInterface.dll", + "DryadLocalScheduler.dll", + + "ProcessService.exe", + "ProcessService.pdb", + "VertexHost.exe", + "VertexHost.pdb", + "VertexHost.exe.config", + "Microsoft.Research.DryadLinq.dll", + "Microsoft.Research.DryadLinq.pdb", + "DryadLinqNativeChannels.dll", + "DryadLinqNativeChannels.pdb", + "DryadManagedChannel.dll", + "DryadManagedChannel.pdb" + }; + public PeloponneseJobSubmission(DryadLinqContext context) { m_context = context; m_localResources = new Dictionary>(); } + static protected bool IsValidDryadDirectory(string directory) + { + IEnumerable filesPresent = + Directory.EnumerateFiles(directory, "*", SearchOption.TopDirectoryOnly) + .Select(x => Path.GetFileName(x).ToLower()); + + IEnumerable filesNeeded = sharedDryadFiles.Select(x => x.ToLower()); + + return (filesPresent.Intersect(filesNeeded).Count() == sharedDryadFiles.Length); + } + + static private bool IsSharedDryadFile(string fileName) + { + IEnumerable sharedFiles = sharedDryadFiles.Select(x => x.ToLower()); + return sharedDryadFiles.Contains(fileName.ToLower()); + } + public void AddJobOption(string fieldName, string fieldVal) { if (fieldName == "cmdline") @@ -72,7 +106,7 @@ namespace Microsoft.Research.DryadLinq var fileName = Path.GetFileName(pathName); var directory = Path.GetDirectoryName(pathName); - if (directory == Context.DryadHomeDirectory) + if (directory == Context.DryadHomeDirectory && IsSharedDryadFile(fileName)) { // we deal with these resources elsewhere return; diff --git a/LinqToDryad/Properties/AssemblyInfo.cs b/LinqToDryad/Properties/AssemblyInfo.cs new file mode 100644 index 0000000..5740619 --- /dev/null +++ b/LinqToDryad/Properties/AssemblyInfo.cs @@ -0,0 +1,33 @@ +using System.Reflection; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +// General Information about an assembly is controlled through the following +// set of attributes. Change these attribute values to modify the information +// associated with an assembly. +[assembly: AssemblyTitle("Microsoft.Research.DryadLinq")] +[assembly: AssemblyDescription("DryadLinq client assembly")] +[assembly: AssemblyConfiguration("")] +[assembly: AssemblyCompany("Microsoft")] +[assembly: AssemblyProduct("Microsoft.Research.DryadLinq.Properties")] +[assembly: AssemblyCopyright("Copyright © 2014")] +[assembly: AssemblyTrademark("")] +[assembly: AssemblyCulture("")] + +// Setting ComVisible to false makes the types in this assembly not visible +// to COM components. If you need to access a type in this assembly from +// COM, set the ComVisible attribute to true on that type. +[assembly: ComVisible(false)] + +// The following GUID is for the ID of the typelib if this project is exposed to COM +[assembly: Guid("ca8db7f2-4be1-4000-86a9-fae808fea322")] + +// Version information for an assembly consists of the following four values: +// +// Major Version +// Minor Version +// Build Number +// Revision +// +[assembly: AssemblyVersion("0.1.1.0")] +[assembly: AssemblyFileVersion("0.1.1.0")] diff --git a/LinqToDryad/QueryTraceLevel.cs b/LinqToDryad/QueryTraceLevel.cs index 84442ba..2d13497 100644 --- a/LinqToDryad/QueryTraceLevel.cs +++ b/LinqToDryad/QueryTraceLevel.cs @@ -23,16 +23,38 @@ using System; namespace Microsoft.Research.DryadLinq { /// - /// Trace levels for DryadLinqQuery runtime + /// Trace levels for DryadLinqQuery runtime. /// public enum QueryTraceLevel : int { - // Use internal constants since public type cannot be easily shared acrossed DLLs w/o resulting in ambiquities + /// + /// This level turns off DryadLINQ logging completely. + /// Off = Constants.TraceOffLevel, + + /// + /// This level only logs critical log entries. + /// Critical = Constants.TraceCriticalLevel, + + /// + /// This level logs error or critical log entries. + /// Error = Constants.TraceErrorLevel, + + /// + /// This level logs warning or more critical log entries. + /// Warning = Constants.TraceWarningLevel, + + /// + /// This level logs information or more critical log entries. + /// Information = Constants.TraceInfoLevel, + + /// + /// This level logs all DryadLINQ log entries. + /// Verbose = Constants.TraceVerboseLevel } } diff --git a/LinqToDryad/VertexEnv.cs b/LinqToDryad/VertexEnv.cs index 33ba23f..e70aa01 100644 --- a/LinqToDryad/VertexEnv.cs +++ b/LinqToDryad/VertexEnv.cs @@ -29,8 +29,10 @@ using Microsoft.Research.DryadLinq; namespace Microsoft.Research.DryadLinq.Internal { - // The class encapsulates the external environment in which a - // managed query operator executes. + /// + /// Exposes the execution environment for managed vertex code. + /// + /// A DryadLINQ user should not need to use this class directly. public class VertexEnv { private const string VERTEX_EXCEPTION_FILENAME = @"VertexException.txt"; @@ -46,6 +48,11 @@ namespace Microsoft.Research.DryadLinq.Internal private bool m_useLargeBuffer; private bool m_multiThreading; + /// + /// Initializes an instnace of VertexEnv. This is called in auto-generated code. + /// + /// + /// public VertexEnv(string args, DryadLinqVertexParams vertexParams) { this.m_argList = args.Split('|'); @@ -67,6 +74,9 @@ namespace Microsoft.Research.DryadLinq.Internal Debug.Assert(vertexParams.OutputArity <= this.m_numberOfOutputs); } + /// + /// Determines whether to run the DryadLINQ local vertex runtime in multi-threaded mode. + /// public bool MultiThreading { get { return m_multiThreading; } @@ -78,21 +88,35 @@ namespace Microsoft.Research.DryadLinq.Internal get { return this.m_nativeHandle; } } + /// + /// The number of inputs of the vertex. + /// public UInt32 NumberOfInputs { get { return this.m_numberOfInputs; } } + /// + /// The number of outputs of the vertex. + /// public UInt32 NumberOfOutputs { get { return this.m_numberOfOutputs; } } + /// + /// The number of command-line arguments of the vertex. + /// public Int32 NumberOfArguments { get { return this.m_argList.Length; } } + /// + /// Gets the argument at the specified index. + /// + /// + /// public string GetArgument(Int32 idx) { return this.m_argList[idx]; @@ -103,6 +127,9 @@ namespace Microsoft.Research.DryadLinq.Internal get { return this.m_useLargeBuffer; } } + /// + /// Gets the vertex id. + /// public Int64 VertexId { get { @@ -110,6 +137,12 @@ namespace Microsoft.Research.DryadLinq.Internal } } + /// + /// Makes a reader for the current input. + /// + /// The record type of the input. + /// The reader factory. + /// A reader for the current input. public DryadLinqVertexReader MakeReader(DryadLinqFactory readerFactory) { bool keepPortOrder = this.m_vertexParams.KeepInputPortOrder(this.m_nextInput); @@ -120,6 +153,12 @@ namespace Microsoft.Research.DryadLinq.Internal return new DryadLinqVertexReader(this, readerFactory, startPort, endPort, keepPortOrder); } + /// + /// Make a writer for the current output. + /// + /// The record type of the output. + /// The writer factory. + /// A writer for the current output. public DryadLinqVertexWriter MakeWriter(DryadLinqFactory writerFactory) { if (this.m_nextOutputPort + 1 < this.m_vertexParams.OutputArity) @@ -135,21 +174,44 @@ namespace Microsoft.Research.DryadLinq.Internal } } + /// + /// Make a binary reader from a native stream. Used only by auto-generated code. + /// + /// A native stream + /// A binary reader public static DryadLinqBinaryReader MakeBinaryReader(NativeBlockStream nativeStream) { return new DryadLinqBinaryReader(nativeStream); } + /// + /// Make a binary reader from a native handle and a port number. Used only by auto-generated code. + /// + /// The native handle + /// The port number + /// A binary reader public static DryadLinqBinaryReader MakeBinaryReader(IntPtr handle, UInt32 port) { return new DryadLinqBinaryReader(handle, port); } + /// + /// Make a binary writer from a native stream. Used only by auto-generated code. + /// + /// A native stream + /// A binary writer public static DryadLinqBinaryWriter MakeBinaryWriter(NativeBlockStream nativeStream) { return new DryadLinqBinaryWriter(nativeStream); } + /// + /// Make a binary writer from a native handle and a port number. Used only by auto-generated code. + /// + /// The native handle + /// The port number + /// A hint of the size of write buffer + /// A binary writer public static DryadLinqBinaryWriter MakeBinaryWriter(IntPtr handle, UInt32 port, Int32 buffSize) { return new DryadLinqBinaryWriter(handle, port, buffSize); @@ -158,9 +220,12 @@ namespace Microsoft.Research.DryadLinq.Internal private static Exception s_lastReportedException; internal static int ErrorCode { get; set; } - // This method gets called by the generated vertex code, as well as VertexBridge - // to report exceptions. The exception will be dumped to "VertexException.txt" - // in the working directory. + /// + /// This method is called by the generated vertex code, as well as VertexBridge + /// to report exceptions. The exception will be dumped to "VertexException.txt" + /// in the working directory. + /// + /// The exception that triggers to call this method. public static void ReportVertexError(Exception e) { // We first need to check whether the same exception object was already diff --git a/LinqToDryad/YarnJobSubmission.cs b/LinqToDryad/YarnJobSubmission.cs index 942b795..bcd8891 100644 --- a/LinqToDryad/YarnJobSubmission.cs +++ b/LinqToDryad/YarnJobSubmission.cs @@ -45,19 +45,19 @@ namespace Microsoft.Research.DryadLinq private string DryadDfs { - get { return Context.DfsClient.Combine("staging", "dryad"); } + get { return Context.Cluster.DfsClient.Combine("staging", "dryad"); } } private string UserDfs { - get { return Context.DfsClient.Combine("user", Environment.UserName, "staging"); } + get { return Context.Cluster.DfsClient.Combine("user", Environment.UserName, "staging"); } } protected override XElement MakeJMConfig() { var qpPath = Path.Combine("..", "..", Path.GetFileName(QueryPlan)); var jmPath = Path.Combine("..", "..", "DryadLinqGraphManager.exe"); - string jobDirectoryTemplate = Context.ClusterClient.JobDirectoryTemplate.Replace("_BASELOCATION_", "dryad-jobs"); + string jobDirectoryTemplate = Context.Cluster.Client(Context).JobDirectoryTemplate.Replace("_BASELOCATION_", "dryad-jobs"); string logDirParam = Microsoft.Research.Peloponnese.Storage.AzureUtils.CmdLineEncode(jobDirectoryTemplate); string[] jmArgs = {"--dfs=" + logDirParam, "VertexHost.exe", qpPath }; // +" --break"; return ConfigHelpers.MakeProcessGroup( @@ -88,20 +88,13 @@ namespace Microsoft.Research.DryadLinq dryadFiles = dryadFiles.Select(x => Path.Combine(Context.DryadHomeDirectory, x)); - waiters.Add(ConfigHelpers.MakeResourceGroupAsync(Context.DfsClient, DryadDfs, true, dryadFiles)); + waiters.Add(ConfigHelpers.MakeResourceGroupAsync(Context.Cluster.DfsClient, DryadDfs, true, dryadFiles)); // add job-local resources to each worker directory, using public versions of the standard Dryad files foreach (var rg in LocalResources) { IEnumerable files = rg.Value.Select(x => Path.Combine(rg.Key, x)); - if (rg.Key == Context.DryadHomeDirectory) - { - waiters.Add(ConfigHelpers.MakeResourceGroupAsync(Context.DfsClient, DryadDfs, true, files)); - } - else - { - waiters.Add(ConfigHelpers.MakeResourceGroupAsync(Context.DfsClient, UserDfs, false, files)); - } + waiters.Add(ConfigHelpers.MakeResourceGroupAsync(Context.Cluster.DfsClient, UserDfs, false, files)); } try @@ -166,19 +159,19 @@ namespace Microsoft.Research.DryadLinq { IEnumerable dryadFiles = new[] { - "DryadLinqGraphManager.exe", - "DryadLinqGraphManager.exe.config", - "Microsoft.Research.Dryad.dll", - "DryadHttpClusterInterface.dll", - "DryadLocalScheduler.dll" + "DryadLinqGraphManager.exe", + "DryadLinqGraphManager.exe.config", + "Microsoft.Research.Dryad.dll", + "DryadHttpClusterInterface.dll", + "DryadLocalScheduler.dll" }; dryadFiles = dryadFiles.Select(x => Path.Combine(Context.DryadHomeDirectory, x)); - waiters.Add(ConfigHelpers.MakeResourceGroupAsync(Context.DfsClient, DryadDfs, true, dryadFiles)); + waiters.Add(ConfigHelpers.MakeResourceGroupAsync(Context.Cluster.DfsClient, DryadDfs, true, dryadFiles)); } IEnumerable userFiles = new[] { configPath, QueryPlan }; - waiters.Add(ConfigHelpers.MakeResourceGroupAsync(Context.DfsClient, UserDfs, false, userFiles)); + waiters.Add(ConfigHelpers.MakeResourceGroupAsync(Context.Cluster.DfsClient, UserDfs, false, userFiles)); try { @@ -196,15 +189,11 @@ namespace Microsoft.Research.DryadLinq resources.Add(t.Result); } - string appName; - if (Context.JobFriendlyName == null) + string appName = Context.JobFriendlyName; + if (String.IsNullOrEmpty(appName)) { appName = "DryadLINQ.App"; } - else - { - appName = Context.JobFriendlyName; - } return ConfigHelpers.MakeLauncherConfig(appName, Path.GetFileName(configPath), resources, JobDirectory); } @@ -212,7 +201,7 @@ namespace Microsoft.Research.DryadLinq private XDocument GenerateConfig() { XElement peloponneseResource = ConfigHelpers.MakePeloponneseResourceGroup( - Context.DfsClient, Context.PeloponneseHomeDirectory); + Context.Cluster.DfsClient, Context.PeloponneseHomeDirectory); string psConfigPath = MakeProcessServiceConfig(); @@ -231,14 +220,11 @@ namespace Microsoft.Research.DryadLinq { get { - if (m_job != null) - { - return m_job.ErrorMsg; - } - else + if (m_job == null) { return null; } + return m_job.ErrorMsg; } } @@ -275,15 +261,28 @@ namespace Microsoft.Research.DryadLinq } } - public string JobDirectory { get { return Context.ClusterClient.JobDirectoryTemplate.Replace("_BASELOCATION_", "dryad-jobs"); } } + public string JobDirectory { get { return Context.Cluster.Client(Context).JobDirectoryTemplate.Replace("_BASELOCATION_", "dryad-jobs"); } } public override void SubmitJob() { + if (Context.PeloponneseHomeDirectory == null) + { + throw new ApplicationException("No Peloponnese home directory is set"); + } + if (Context.DryadHomeDirectory == null) + { + throw new ApplicationException("No Dryad home directory is set"); + } + if (!IsValidDryadDirectory(Context.DryadHomeDirectory)) + { + throw new ApplicationException("Dryad home directory " + Context.DryadHomeDirectory + " is missing some required files"); + } + XDocument config = GenerateConfig(); try { - m_job = Context.ClusterClient.Submit(config, JobDirectory); + m_job = Context.Cluster.Client(Context).Submit(config, JobDirectory); } catch (Exception e) { @@ -308,10 +307,5 @@ namespace Microsoft.Research.DryadLinq return m_job.Id; } } - - internal void Initialize() - { - // nothing needed for now - } } } diff --git a/LinqToDryad/YarnScheduler.cs b/LinqToDryad/YarnScheduler.cs deleted file mode 100644 index 2324704..0000000 --- a/LinqToDryad/YarnScheduler.cs +++ /dev/null @@ -1,48 +0,0 @@ -/* -Copyright (c) Microsoft Corporation - -All rights reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in -compliance with the License. You may obtain a copy of the License -at http://www.apache.org/licenses/LICENSE-2.0 - - -THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER -EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED WARRANTIES OR CONDITIONS OF -TITLE, FITNESS FOR A PARTICULAR PURPOSE, MERCHANTABLITY OR NON-INFRINGEMENT. - - -See the Apache Version 2.0 License for specific language governing permissions and -limitations under the License. - -*/ - -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; - -namespace Microsoft.Research.DryadLinq -{ - internal class YarnScheduler : IScheduler - { - public string m_headNode; - - public void Connect(string headNode) - { - this.m_headNode = headNode; - } - - public void Dispose() - { - this.m_headNode = null; - } - - public IServerVersion GetServerVersion() - { - throw new NotImplementedException(); - } - - } -} diff --git a/LinqToDryad/packages.config b/LinqToDryad/packages.config index 6cc3081..bd29d67 100644 --- a/LinqToDryad/packages.config +++ b/LinqToDryad/packages.config @@ -1,13 +1,21 @@ - + + + + - + + + + + + - + \ No newline at end of file diff --git a/LocalScheduler/LocalScheduler.csproj b/LocalScheduler/LocalScheduler.csproj index f56b2c7..a4c8fdf 100644 --- a/LocalScheduler/LocalScheduler.csproj +++ b/LocalScheduler/LocalScheduler.csproj @@ -1,6 +1,6 @@ - + - + Debug @@ -52,15 +52,39 @@ False - ..\packages\Microsoft.Research.Peloponnese.0.7.1-beta\lib\net45\Microsoft.Research.Peloponnese.HadoopBridge.dll + ..\packages\Microsoft.Research.Peloponnese.0.7.2-beta\lib\net45\Microsoft.Research.Peloponnese.HadoopBridge.dll - + False - ..\packages\Microsoft.Research.Peloponnese.0.7.1-beta\lib\net45\Microsoft.Research.Peloponnese.Utils.dll + ..\packages\Microsoft.Research.Peloponnese.0.7.2-beta\lib\net45\Microsoft.Research.Peloponnese.Utils.dll + + + False + ..\packages\Microsoft.Bcl.Async.1.0.166\lib\net40\Microsoft.Threading.Tasks.dll + + + False + ..\packages\Microsoft.Bcl.Async.1.0.166\lib\net40\Microsoft.Threading.Tasks.Extensions.dll + + + False + ..\packages\Microsoft.Bcl.Async.1.0.166\lib\net40\Microsoft.Threading.Tasks.Extensions.Desktop.dll + + + False + ..\packages\Microsoft.WindowsAzure.Common.1.0.1\lib\net45\Microsoft.WindowsAzure.Common.dll + + + False + ..\packages\Microsoft.WindowsAzure.Common.1.0.1\lib\net45\Microsoft.WindowsAzure.Common.NetFramework.dll ..\packages\Microsoft.WindowsAzure.ConfigurationManager.2.0.3\lib\net40\Microsoft.WindowsAzure.Configuration.dll + + False + ..\packages\Microsoft.WindowsAzure.Management.1.0.0\lib\net40\Microsoft.WindowsAzure.Management.dll + False ..\packages\Microsoft.WindowsAzure.Management.HDInsight.1.1.0.7\lib\net40\Microsoft.WindowsAzure.Management.HDInsight.dll @@ -73,6 +97,10 @@ False ..\packages\Microsoft.Hadoop.Client.1.1.0.7\lib\net40\Microsoft.WindowsAzure.Management.HDInsight.Framework.Core.dll + + False + ..\packages\Microsoft.WindowsAzure.Management.Storage.1.0.0\lib\net40\Microsoft.WindowsAzure.Management.Storage.dll + False ..\packages\WindowsAzure.Storage.3.1.0.1\lib\net40\Microsoft.WindowsAzure.Storage.dll @@ -84,6 +112,17 @@ + + + + False + ..\packages\Microsoft.Net.Http.2.2.19\lib\net45\System.Net.Http.Extensions.dll + + + False + ..\packages\Microsoft.Net.Http.2.2.19\lib\net45\System.Net.Http.Primitives.dll + + False ..\packages\System.Spatial.5.6.1\lib\net40\System.Spatial.dll @@ -114,14 +153,19 @@ + + + + + This project references NuGet package(s) that are missing on this computer. Enable NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. - - + + - + - + \ No newline at end of file diff --git a/LocalScheduler/Properties/AssemblyInfo.cs b/LocalScheduler/Properties/AssemblyInfo.cs index e546c94..e9c31ed 100644 --- a/LocalScheduler/Properties/AssemblyInfo.cs +++ b/LocalScheduler/Properties/AssemblyInfo.cs @@ -51,5 +51,5 @@ using System.Runtime.InteropServices; // You can specify all the values or you can default the Build and Revision Numbers // by using the '*' as shown below: // [assembly: AssemblyVersion("1.0.*")] -[assembly: AssemblyVersion("0.1.0.0")] -[assembly: AssemblyFileVersion("0.1.0.0")] +[assembly: AssemblyVersion("0.1.2.0")] +[assembly: AssemblyFileVersion("0.1.2.0")] diff --git a/LocalScheduler/packages.config b/LocalScheduler/packages.config index 6cc3081..bd29d67 100644 --- a/LocalScheduler/packages.config +++ b/LocalScheduler/packages.config @@ -1,13 +1,21 @@ - + + + + - + + + + + + - + \ No newline at end of file diff --git a/Microsoft.Research.Dryad.nuspec b/Microsoft.Research.Dryad.nuspec index 55ebd05..23d3ad1 100644 --- a/Microsoft.Research.Dryad.nuspec +++ b/Microsoft.Research.Dryad.nuspec @@ -2,11 +2,11 @@ Microsoft.Research.Dryad - 0.7.0-beta022 + 0.1.2-beta Dryad and DryadLINQ for YARN bigdatadev@microsoft.com bigdatadev@microsoft.com,Microsoft - http://research.microsoft.com/en-us/projects/dryad/ + http://www.apache.org/licenses/LICENSE-2.0.html http://research.microsoft.com/en-us/projects/dryad/ true The goal of DryadLINQ is to make distributed computing on large compute cluster simple enough for every programmer. DryadLINQ combines two important pieces of Microsoft technology: the Dryad distributed execution engine and the .NET Language Integrated Query (LINQ). @@ -17,7 +17,7 @@ Dryad provides reliable, distributed computing on thousands of servers for large en-US - + @@ -36,28 +36,29 @@ Dryad provides reliable, distributed computing on thousands of servers for large - + + - + - + - + - + - - + + - + - + diff --git a/ProcessService/Cache.cs b/ProcessService/Cache.cs index e94fc7f..b227085 100644 --- a/ProcessService/Cache.cs +++ b/ProcessService/Cache.cs @@ -17,6 +17,8 @@ See the Apache Version 2.0 License for specific language governing permissions a limitations under the License. */ + +#if false using System; using System.Collections.Generic; using System.Linq; @@ -296,3 +298,4 @@ namespace Microsoft.Research.Dryad.ProcessService } } } +#endif \ No newline at end of file diff --git a/ProcessService/ProcessService.cs b/ProcessService/ProcessService.cs index 54a6fb7..8664904 100644 --- a/ProcessService/ProcessService.cs +++ b/ProcessService/ProcessService.cs @@ -129,7 +129,7 @@ namespace Microsoft.Research.Dryad.ProcessService private int id; private Dictionary mailbox; - private Dictionary inProgressFile; + //private Dictionary inProgressFile; private int exitCode; private ProcessStatus status; private Int64 startTime; diff --git a/ProcessService/ProcessService.csproj b/ProcessService/ProcessService.csproj index d2dfd2a..4b3731e 100644 --- a/ProcessService/ProcessService.csproj +++ b/ProcessService/ProcessService.csproj @@ -1,6 +1,6 @@ - + - + Debug @@ -53,15 +53,39 @@ False - ..\packages\Microsoft.Research.Peloponnese.0.7.1-beta\lib\net45\Microsoft.Research.Peloponnese.HadoopBridge.dll + ..\packages\Microsoft.Research.Peloponnese.0.7.2-beta\lib\net45\Microsoft.Research.Peloponnese.HadoopBridge.dll - + False - ..\packages\Microsoft.Research.Peloponnese.0.7.1-beta\lib\net45\Microsoft.Research.Peloponnese.Utils.dll + ..\packages\Microsoft.Research.Peloponnese.0.7.2-beta\lib\net45\Microsoft.Research.Peloponnese.Utils.dll + + + False + ..\packages\Microsoft.Bcl.Async.1.0.166\lib\net40\Microsoft.Threading.Tasks.dll + + + False + ..\packages\Microsoft.Bcl.Async.1.0.166\lib\net40\Microsoft.Threading.Tasks.Extensions.dll + + + False + ..\packages\Microsoft.Bcl.Async.1.0.166\lib\net40\Microsoft.Threading.Tasks.Extensions.Desktop.dll + + + False + ..\packages\Microsoft.WindowsAzure.Common.1.0.1\lib\net45\Microsoft.WindowsAzure.Common.dll + + + False + ..\packages\Microsoft.WindowsAzure.Common.1.0.1\lib\net45\Microsoft.WindowsAzure.Common.NetFramework.dll ..\packages\Microsoft.WindowsAzure.ConfigurationManager.2.0.3\lib\net40\Microsoft.WindowsAzure.Configuration.dll + + False + ..\packages\Microsoft.WindowsAzure.Management.1.0.0\lib\net40\Microsoft.WindowsAzure.Management.dll + False ..\packages\Microsoft.WindowsAzure.Management.HDInsight.1.1.0.7\lib\net40\Microsoft.WindowsAzure.Management.HDInsight.dll @@ -74,6 +98,10 @@ False ..\packages\Microsoft.Hadoop.Client.1.1.0.7\lib\net40\Microsoft.WindowsAzure.Management.HDInsight.Framework.Core.dll + + False + ..\packages\Microsoft.WindowsAzure.Management.Storage.1.0.0\lib\net40\Microsoft.WindowsAzure.Management.Storage.dll + False ..\packages\WindowsAzure.Storage.3.1.0.1\lib\net40\Microsoft.WindowsAzure.Storage.dll @@ -85,6 +113,17 @@ + + + + False + ..\packages\Microsoft.Net.Http.2.2.19\lib\net45\System.Net.Http.Extensions.dll + + + False + ..\packages\Microsoft.Net.Http.2.2.19\lib\net45\System.Net.Http.Primitives.dll + + False ..\packages\System.Spatial.5.6.1\lib\net40\System.Spatial.dll @@ -110,14 +149,19 @@ + + + + + This project references NuGet package(s) that are missing on this computer. Enable NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. - - + + - + - + \ No newline at end of file diff --git a/ProcessService/Properties/AssemblyInfo.cs b/ProcessService/Properties/AssemblyInfo.cs index e0744f2..b4fea88 100644 --- a/ProcessService/Properties/AssemblyInfo.cs +++ b/ProcessService/Properties/AssemblyInfo.cs @@ -51,5 +51,5 @@ using System.Runtime.InteropServices; // You can specify all the values or you can default the Build and Revision Numbers // by using the '*' as shown below: // [assembly: AssemblyVersion("1.0.*")] -[assembly: AssemblyVersion("0.1.0.0")] -[assembly: AssemblyFileVersion("0.1.0.0")] +[assembly: AssemblyVersion("0.1.2.0")] +[assembly: AssemblyFileVersion("0.1.2.0")] diff --git a/ProcessService/SpillMachine.cs b/ProcessService/SpillMachine.cs index 93cb783..9a30700 100644 --- a/ProcessService/SpillMachine.cs +++ b/ProcessService/SpillMachine.cs @@ -17,6 +17,7 @@ See the Apache Version 2.0 License for specific language governing permissions a limitations under the License. */ +#if false using System; using System.Collections.Generic; using System.Linq; @@ -279,3 +280,4 @@ namespace Microsoft.Research.Dryad.ProcessService } } } +#endif \ No newline at end of file diff --git a/ProcessService/packages.config b/ProcessService/packages.config index 6cc3081..bd29d67 100644 --- a/ProcessService/packages.config +++ b/ProcessService/packages.config @@ -1,13 +1,21 @@ - + + + + - + + + + + + - + \ No newline at end of file diff --git a/README.txt b/README.txt index e3f0c06..44634a0 100644 --- a/README.txt +++ b/README.txt @@ -8,41 +8,27 @@ the LINQ programming model for distributed data processing and leverages Dryad for reliable execution. Dryad and DryadLINQ on YARN are still under active development. -As a result, you should expect some fragility. + +If you just want to run a sample DryadLINQ program, the simplest way to get started +is to follow the instructions at +http://microsoftresearchsvc.github.io/Dryad/?topic=html/e992fd94-c956-481d-82e6-dbdf45daa722.htm Requirements -A version of YARN built for Windows - The BUILDING.txt file in the Hadoop YARN repository contains - instructions on building YARN for Windows. Visual Studio 2012 -Java Development Kit 1.6 -A Windows YARN cluster composed of x64 machines +An Azure HDInsight 3.0 cluster or a Windows YARN cluster composed of x64 machines Building Dryad 1) Clone the Dryad git repository. -2) Ensure that YARN_HOME environment variable is set. -3) Set the DRYAD_HOME environment variable to binary path - (bin\Debug or bin\Release) under the directory Dryad was cloned to. -4) Use Visual Studio to open The Dryad solution file (Dryad.sln) located - in the root of the repository and build the solution. -5) Run Build.bat in the Java directory at the top-level of the repository. - The CLASSPATH will need to be set to the output of the 'yarn classpath' - command. +2) Set the DRYAD_HOME environment variable to binary path + (bin\Debug or bin\Release) under the directory Dryad was cloned to. +3) Use Visual Studio to open The Dryad solution file (Dryad.sln) located + in the root of the repository and build the solution. The dependencies + will be fetched from NuGet. -Cluster setup -1) Setup your YARN cluster as you normally would. -2) Copy the contents of the DRYAD_HOME directory to the location set by - DRYAD_HOME on each compute node in the cluster. Notes -The YARN interfaces used are current as of commit dfb83b8 in trunk. - -If you are running debug builds of the Dryad, also copy the files msvcp110d.dll -and msvcr110d.dll to the DRYAD_HOME directory on each compute node. The -article at http://msdn.microsoft.com/en-us/library/vstudio/aa985618.aspx -describes how to do this. - -The HDFS implementation in Dryad currently only supports text files. +The HDFS implementation in Dryad currently only supports text files, although Azure Blobs can +be used to store any serialized data type. diff --git a/XmlDoc/Content/GettingStarted.aml b/XmlDoc/Content/GettingStarted.aml new file mode 100644 index 0000000..8fad743 --- /dev/null +++ b/XmlDoc/Content/GettingStarted.aml @@ -0,0 +1,85 @@ + + + + + + + + These topics are designed to help you get started using DryadLINQ. + + + +
+ Getting Started Topics + + + + + Quick Start guide to downloading and trying DryadLINQ + + + + + + Guide to setting up an HDInsight 3.0 cluster to use with DryadLINQ + + + + + + Guide to building the DryadLINQ job browser + + + + + + +
+ + + + +
+
diff --git a/XmlDoc/Content/GettingStarted/Building the Job Browser.aml b/XmlDoc/Content/GettingStarted/Building the Job Browser.aml new file mode 100644 index 0000000..f7d4ccf --- /dev/null +++ b/XmlDoc/Content/GettingStarted/Building the Job Browser.aml @@ -0,0 +1,130 @@ + + + + + + + + For now the only way to get the Job Browser is to build it from source. Follow these instructions to + build the DryadLINQ job browser: + + + + + Getting the code + + + + If you don't have any git tools installed, either install the + Visual Studio Tools for Git + http://visualstudiogallery.msdn.microsoft.com/abafc7d6-dcaa-40f4-8a5e-d6724bdb980c + _blank + extension to Visual Studio 2012 (VS 2013 has them built in) or + Git for Windows + http://msysgit.github.io/ + _blank + command line tools. + + + + + Clone the Dryad sources from + https://github.com/MicrosoftResearchSVC/Dryad.git + https://github.com/MicrosoftResearchSVC/Dryad.git + _blank + . + + + + + Open <mydryaddirectory>\JobBrowser\JobBrowser.sln in Visual Studio 2012 or 2013 and + build the Release target. + + + + + + + + Installing for later use + + + + Copy <mydryaddirectory>\JobBrowser\JobBrowser\bin\x64\Release to a folder of your + choosing, e.g. c:\DryadLinqBrowser and add it to your PATH if desired. + + + + + Run DryadLinqBrowser.exe from that directory. The first time it runs it will read your + subscriptions (if you have configured Powershell as described in + Setting up an HDInsight 3.0 cluster) and detect all the + attached HDInsight clusters. If you want it to rescan later you can click on <scan> + in the Cluster menu, or add and remove clusters manually from that menu. + + + + + Select a cluster from the Cluster menu. + + + + + + + + + + + + + + + + + DryadLINQ Job Browser documentation + https://github.com/MicrosoftResearchSVC/Dryad/wiki/Using%20the%20DryadLINQ%20job%20browser + + + + + diff --git a/XmlDoc/Content/GettingStarted/QuickStart.aml b/XmlDoc/Content/GettingStarted/QuickStart.aml new file mode 100644 index 0000000..e0869cf --- /dev/null +++ b/XmlDoc/Content/GettingStarted/QuickStart.aml @@ -0,0 +1,154 @@ + + + + + + Instructions for quickly getting started with DryadLINQ: + + + + Get the core DryadLINQ libraries and example code + + + + Create a new Visual Studio Visual C# Console Application project. You need Visual Studio 2012 or later. + + + + + Go to the Configuration manager menu, select your project, and add a new project platform x64, + making sure to check the Create new solution platforms box. + + + + + + Right-click on the project and select "Manage NuGet Packages...". + + + + + Change the "Stable Only" dropdown to "Include Prerelease". + + + + + Choose 'Online/nuget.org' in the left pane. + + + + + Enter "Dryad" into the search box, then select and install "Sample DryadLINQ Programs". + + + + + The sample includes a Word Count example in a separate WordCount.cs file. Add + WordCount.WordCountExample(); to the Main method in Program.cs. + + + + You should now be able to build the project. + + + + Running the sample on your local computer + + + + The sample is initially configured to run locally. All you have to do is build and run the program with + no arguments, and it will perform a word count on a single line of text embedded in the lines + variable, and output the results to the console. The output will look something like: + + + + + + + + + + Running the sample on an Azure cluster + + + + If you don't already have one, you must first create an HDInsight cluster. + + + + + Comment the #define local statement at the top of WordCount.cs + to modify the sample to run on Azure. + + + + + Specify your cluster name, default storage account and default container in the + clusterName, accountName and containerName + variable constructors. + + + + + Now rebuild and run, and the program will execute on your cluster, writing output to Azure blob storage. + The first time you run a DryadLINQ program on a cluster you will see lots of output log lines telling you that + binaries are being uploaded to 'staging' blobs so that they will be available for use by the running program. + Subsequent runs won't need nearly as many files to be uploaded. Once all the staging files have been uploaded + there will be a delay of a minute or so while HDInsight launches the DryadLINQ job. For a short test like this + one above almost all the time is taken by job launch, but for serious computation this startup time won't make + a big difference to the overall running time. Eventually the submission program will return, and if you go look + at the Azure storage container you specified above, there will be a blob called + wc-out.txt/part-00000000-0001-0000 which contains the counted output. + For this simple program there is only a single output part, though in general there may be many. + + + + + + + Setting up an HDInsight 3.0 cluster + + + + diff --git a/XmlDoc/Content/GettingStarted/SettingUpCluster.aml b/XmlDoc/Content/GettingStarted/SettingUpCluster.aml new file mode 100644 index 0000000..9cede55 --- /dev/null +++ b/XmlDoc/Content/GettingStarted/SettingUpCluster.aml @@ -0,0 +1,101 @@ + + + + + + Instructions for setting up an HDInsight 3.0 cluster to use with DryadLINQ. + + + + + Getting and configuring a cluster + + + + If you don't already have one, set up an Azure subscription from the + Azure welcome website + http://azure.microsoft.com/en-us/ + _blank + + + + + + In order for the DryadLINQ submission process to work smoothly, you must install and configure the + + Azure powershell tools + http://azure.microsoft.com/en-us/documentation/articles/install-configure-powershell/ + _blank + . In the "How to: Connect to your subscription" item, follow the instructions under "Use the + certificate method." Ensure that your environment is correctly setup by running Get-AzureSubscription + and verifying that your subscription has information identifying the management certificate. + + + + + If you don't already have one, you need to + create an HDInsight 3.0 cluster + http://azure.microsoft.com/en-us/documentation/articles/hdinsight-get-started-30/ + _blank + . (Note that the page title talks about "Hadoop 2.2"---this is correct, since HDInsight 3.0 + uses Hadoop 2.2.) + + + + + Make a note of the default storage account and container for your cluster if you want to use the sample program, + since it reads from data stored in that container. + + + + + + + + + + + + + + + + + + + diff --git a/XmlDoc/Content/Resources.aml b/XmlDoc/Content/Resources.aml new file mode 100644 index 0000000..d06df02 --- /dev/null +++ b/XmlDoc/Content/Resources.aml @@ -0,0 +1,68 @@ + + + + + + + + These topics give more information about DryadLinq. + + + +
+ Getting Started Topics + + + + + What happens when a DryadLINQ job runs on an HDInsight cluster + + + + + +
+ + + + +
+
diff --git a/XmlDoc/Content/Resources/Running a job on HDInsight.aml b/XmlDoc/Content/Resources/Running a job on HDInsight.aml new file mode 100644 index 0000000..4f422be --- /dev/null +++ b/XmlDoc/Content/Resources/Running a job on HDInsight.aml @@ -0,0 +1,174 @@ + + + + + + + + + + + The process for running a DryadLINQ application on HDInsight 3.0 is a bit complicated. This is because + HDInsight does not expose all of the "raw" Hadoop 2.2 protocols to clients outside the cluster. In particular, + the only way to launch a job on a cluster is using the + Templeton + http://people.apache.org/~thejas/templeton_doc_latest/index.html + _blank + REST APIs, as nicely wrapped up in the + Microsoft .NET SDK for Hadoop + Optional alternate text + http://hadoopsdk.codeplex.com/ + _blank + . Unfortunately, right now Templeton does not support native YARN applications like DryadLINQ, and so + the only jobs that may be launched from outside the cluster are Hadoop 1 jobs (MapReduce, HIVE, Pig, and so on). + + + + + + + + What happens when your client program runs a job + + + + The client DryadLINQ program determines all of the resources that will be needed in the job. It + checks to see if they are already present on the cluster (using a hash of the binary) and uploads any that + are not present. They are uploaded to the default cluster storage account, so that Hadoop 2.2 services like + YARN will be able to read them using wasb. (See + Using Azure Blob storage with HDInsight + Optional alternate text + http://azure.microsoft.com/en-us/documentation/articles/hdinsight-use-blob-storage/ + _blank + for an explanation of how wasb/hdfs interacts with Azure blob storage.) + + + + + The client serializes a description of the DryadLINQ YARN application into an XML file. This file contains + a list of the resources that the DryadLINQ Application Master needs in order to run, and a command line for the + application master. (See + YARN concepts + http://hortonworks.com/blog/apache-hadoop-yarn-concepts-and-applications/ + _blank + for an explanation of application masters.) This XML file is uploaded to the cluster's + default container as user/<yourUserName>/staging/<jobGuid>.xml.<hash>. + + + + + The client calls the .NET Hadoop SDK to run a Hadoop Streaming job using the above XML file as input. + + + + + The .NET SDK calls the Templeton REST API on your cluster. + + + + + The Templeton REST server launches a MapReduce job called TempletonControllerJob on + your cluster. + + + + + The controller job launches a second MapReduce job called streamjob<someNumber>.jar + on your cluster. + + + + + The streaming job reads the XML serialized above, and launches the DryadLINQ YARN application master, which + then actually runs your program. The title of the DryadLINQ application is DryadLINQ.App by + default, but you can set it to something more friendly using the JobFriendlyName property + of the DryadLinqContext. + + + + + The streaming job writes the YARN application Id for the DryadLINQ application back to the cluster's default + container as user/<yourUserName>/staging/<jobGuid>/part.00000. + + + + + The DryadLINQ application writes heartbeat, logging and status information into a container called + dryad-jobs/<yarn-application-id> in the cluster's default storage account. + + + + + The client code reads the application id from user/<yourUserName>/staging/<jobGuid>/part.00000 + and then monitors dryad-jobs/<yarn-application-id> to get updates on the progress of the job. + This is also where the job browser gets its information about the job. + + + + + + If you + Enable Remote Desktop on your HDInsight cluster + http://azure.microsoft.com/en-us/documentation/articles/hdinsight-administer-use-management-portal/ + _blank + , and click on the Hadoop YARN Status shortcut link on the desktop, you can see all these + jobs running. + Unfortunately because of the current configuration of HDInsight clusters, all DryadLINQ logs are deleted immediately + when the application exits, and you will get a "Failed redirect for container" error if you try to navigate to the logs of + a completed application. We have tried to report errors in user application code back so that they are visible in the + DryadLINQ Job Browser to avoid the need to consult + the logs. + + + + + + + + + + + diff --git a/XmlDoc/Content/VersionHistory/VersionHistory.aml b/XmlDoc/Content/VersionHistory/VersionHistory.aml new file mode 100644 index 0000000..c0bca26 --- /dev/null +++ b/XmlDoc/Content/VersionHistory/VersionHistory.aml @@ -0,0 +1,29 @@ + + + + + The topics in this section describe the various changes made to the DryadLINQ project over the +life of the project. + + +
+ Version History + + Select a version below to see a description of its changes. + + + + + + + + + +
+ + + + + +
+
diff --git a/XmlDoc/Content/VersionHistory/v0.1.2.aml b/XmlDoc/Content/VersionHistory/v0.1.2.aml new file mode 100644 index 0000000..9caf4f6 --- /dev/null +++ b/XmlDoc/Content/VersionHistory/v0.1.2.aml @@ -0,0 +1,28 @@ + + + + + Version v0.1.2 was released on April 17th. 2014. + + + +
+ Changes in This Release + + + + + Initial documented release + + + + + +
+ + + + + +
+
diff --git a/XmlDoc/Content/Welcome.aml b/XmlDoc/Content/Welcome.aml new file mode 100644 index 0000000..c6fe694 --- /dev/null +++ b/XmlDoc/Content/Welcome.aml @@ -0,0 +1,62 @@ + + + + + Dryad is a distributed execution engine that provides fault-tolerant execution of computations on a + cluster of computers. The goal of DryadLINQ is to make distributed computing on large compute clusters simple. + DryadLINQ combines two important pieces of Microsoft technology: the Dryad distributed execution engine and the + .NET Language Integrated Query( + LINQ + http://msdn.microsoft.com/en-us/library/vstudio/bb397926.aspx + _blank + ). The DryadLINQ job browser is a graphical user interface which provides a + unified view of a large part of the distributed state of a DryadLINQ job. + + Dryad, DryadLINQ, and the JobBrowser have been released as open source under the Apache 2.0 license. The + source is hosted at the + Dryad GitHub repository + https://github.com/MicrosoftResearchSVC/Dryad + _blank + . There are also + Dryad NuGet packages + http://www.nuget.org/packages?q=dryadlinq + _blank + available built from the source, to make + it easy to develop applications using these tools. + + + +
+ Getting Started + + There are Getting Started instructions here. + +
+ +
+ Using the job browser + + See details on Building the job browser. More documentation + is listed in the + DryadLINQ job browser manual + https://github.com/MicrosoftResearchSVC/Dryad/wiki/Using%20the%20DryadLINQ%20job%20browser + _blank + . + +
+ +
+ Resources + + Reference documentation for the N:Microsoft.Research.DryadLinq namespace. + + What actually happens when a DryadLINQ job runs on HDInsight. + + +
+ + + + +
+
diff --git a/XmlDoc/ContentLayout.content b/XmlDoc/ContentLayout.content new file mode 100644 index 0000000..d2b3650 --- /dev/null +++ b/XmlDoc/ContentLayout.content @@ -0,0 +1,26 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/XmlDoc/Media/Dryad on Azure Architecture.png b/XmlDoc/Media/Dryad on Azure Architecture.png new file mode 100644 index 0000000..410b709 Binary files /dev/null and b/XmlDoc/Media/Dryad on Azure Architecture.png differ diff --git a/XmlDoc/XmlDoc.shfbproj b/XmlDoc/XmlDoc.shfbproj new file mode 100644 index 0000000..3dd257b --- /dev/null +++ b/XmlDoc/XmlDoc.shfbproj @@ -0,0 +1,108 @@ + + + + + Debug + AnyCPU + 2.0 + 5d2623e0-22a0-4c25-b060-561679aa3260 + 1.9.9.0 + + XmlDoc + XmlDoc + XmlDoc + + .NET Framework 4.5 + .\Help\ + XmlDoc + en-US + + + + + + + + + + + + + + OnlyWarningsAndErrors + Website + False + True + False + False + True + 2 + False + C#, Visual Basic, Managed C++ + Blank + False + VS2010 + False + Guid + DryadLINQ documentation + AboveNamespaces + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + C:\Program Files (x86)\Reference Assemblies\Microsoft\Framework\.NETFramework\v4.5\System.Core.dll + True + + + + + Dryad on Azure Architecture + Dryad on Azure Architecture + + + + + \ No newline at end of file diff --git a/XmlDoc/XmlDoc.sln b/XmlDoc/XmlDoc.sln new file mode 100644 index 0000000..24d24e5 --- /dev/null +++ b/XmlDoc/XmlDoc.sln @@ -0,0 +1,20 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 2012 +Project("{7CF6DF6D-3B04-46F8-A40B-537D21BCA0B4}") = "XmlDoc", "XmlDoc.shfbproj", "{5D2623E0-22A0-4C25-B060-561679AA3260}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Release|Any CPU = Release|Any CPU + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {5D2623E0-22A0-4C25-B060-561679AA3260}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {5D2623E0-22A0-4C25-B060-561679AA3260}.Debug|Any CPU.Build.0 = Debug|Any CPU + {5D2623E0-22A0-4C25-B060-561679AA3260}.Release|Any CPU.ActiveCfg = Release|Any CPU + {5D2623E0-22A0-4C25-B060-561679AA3260}.Release|Any CPU.Build.0 = Release|Any CPU + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/samples/Microsoft.Research.DryadLinq.Samples.nuspec b/samples/Microsoft.Research.DryadLinq.Samples.nuspec index 82e57cf..236f5f4 100644 --- a/samples/Microsoft.Research.DryadLinq.Samples.nuspec +++ b/samples/Microsoft.Research.DryadLinq.Samples.nuspec @@ -2,11 +2,11 @@ Microsoft.Research.DryadLinq.Samples - 0.1.0-beta + 0.1.2-beta Sample DryadLINQ Programs - msrsvc - msrsvc - http://research.microsoft.com/en-us/projects/dryad/ + bigdatadev@microsoft.com + bigdatadev@microsoft.com,Microsoft + http://www.apache.org/licenses/LICENSE-2.0.html http://research.microsoft.com/en-us/projects/dryad/ true Sample DryadLINQ programs. @@ -14,7 +14,7 @@ en-US - + diff --git a/samples/WordCount.cs.pp b/samples/WordCount.cs.pp index 9329058..4a59329 100644 --- a/samples/WordCount.cs.pp +++ b/samples/WordCount.cs.pp @@ -1,33 +1,59 @@ -using System; +// comment the following line to run on Azure +#define local + +using System; using System.Collections.Generic; using System.Linq; using Microsoft.Research.DryadLinq; using Microsoft.Research.Peloponnese.Storage; -namespace $rootnamespace$ +namespace $rootnamespace$ { public class WordCount { public static void WordCountExample() { - string accountName = "Replace with your account name"; - string storageKey = "Replace with your storage key"; - string containerName = "Replace with the default container name for your HDInsight cluster"; - - // If you have multiple HDInsight clusters, modify the DryadLinqContext to include the clusterName - var config = new DryadLinqContext(accountName, storageKey, containerName); - - // the LocalExecution flag determines if the computation is run locally or on a cluster - config.LocalExecution = false; - - var input = config.FromStore(AzureUtils.ToAzureUri(accountName, storageKey, containerName, +#if local + // This overload runs the computation on your local computer using a single worker + var config = new DryadLinqContext(1); + + var lines = new LineRecord[] { new LineRecord("This is a dummy line for a short job") }; + // You can create inputs from any IEnumerable source using this method + var input = config.FromEnumerable(lines); +#else + string clusterName = "Replace with your HDInsight 3.0 cluster name"; + // to use the davinci.txt example input below, select your cluster's default + // storage account and container, which automatically includes the sample text + string accountName = "Replace with a storage account name"; + string containerName = "Replace with a storage container name"; + + // This overload creates an Azure-based computation + var config = new DryadLinqContext(clusterName); + config.JobFriendlyName = "DryadLINQ Sample Wordcount"; + + // plain text files should be read as type LineRecord + var input = config.FromStore(AzureUtils.ToAzureUri(accountName, containerName, "example/data/gutenberg/davinci.txt")); +#endif + var words = input.SelectMany(x => x.Line.Split(' ')); var groups = words.GroupBy(x => x); var counts = groups.Select(x => new KeyValuePair(x.Key, x.Count())); var toOutput = counts.Select(x => new LineRecord(String.Format("{0}: {1}", x.Key, x.Value))); - var info = toOutput.ToStore(AzureUtils.ToAzureUri(accountName, storageKey, containerName, - "wc-out.txt")).SubmitAndWait(); + +#if local + // any collection computed by the query can be materialized back at the client, + // not just the 'output' collection. For large collections this is expensive! + foreach (LineRecord line in toOutput) + { + Console.WriteLine(line.Line); + } +#else + // the 'true' parameter to ToStore means the output will be over-written if you run + // the job more than once + var info = toOutput.ToStore(AzureUtils.ToAzureUri(accountName, containerName, + "wc-out.txt"), true).SubmitAndWait(); +#endif } } }