305 lines
11 KiB
C#
305 lines
11 KiB
C#
/*
|
|
Copyright (c) Microsoft Corporation
|
|
|
|
All rights reserved.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in
|
|
compliance with the License. You may obtain a copy of the License
|
|
at http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
|
THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER
|
|
EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED WARRANTIES OR CONDITIONS OF
|
|
TITLE, FITNESS FOR A PARTICULAR PURPOSE, MERCHANTABLITY OR NON-INFRINGEMENT.
|
|
|
|
|
|
See the Apache Version 2.0 License for specific language governing permissions and
|
|
limitations under the License.
|
|
|
|
*/
|
|
|
|
using System;
|
|
using System.Collections.Generic;
|
|
using System.Linq;
|
|
using System.Text;
|
|
using System.Diagnostics;
|
|
using System.Linq.Expressions;
|
|
using System.Reflection;
|
|
using System.IO;
|
|
using Microsoft.Research.DryadLinq.Internal;
|
|
|
|
namespace Microsoft.Research.DryadLinq
|
|
{
|
|
|
|
|
|
/// <summary>
|
|
/// Represents the context necessary to prepare and execute a HPC LINQ Query,
|
|
/// </summary>
|
|
/// <remarks>
|
|
/// <para>
|
|
/// HpcLinqContext is the main entry point for the HPC LINQ framework. The context
|
|
/// that is maintained by a HpcLinqContext instance includes configuration information, a connection to a DSC Service
|
|
/// that can be used during execution, and a connection to a HPC Server.
|
|
/// </para>
|
|
/// <para>
|
|
/// A HpcLinqContext may be reused by multiple queries and query executions.
|
|
/// </para>
|
|
/// <para>
|
|
/// A HpcLinqContext may hold open connections to DSC and a HPC Server. To release these connections, call
|
|
/// HpcLinqContext.Dispose()
|
|
/// </para>
|
|
/// </remarks>
|
|
public sealed class HpcLinqContext : IDisposable
|
|
{
|
|
private HpcLinqConfiguration _configuration;
|
|
private HpcQueryRuntime _runtime;
|
|
private DscService _dscService;
|
|
private string _hdfsServiceNode;
|
|
private Version _clientVersion;
|
|
private Version _serverVersion;
|
|
|
|
/// <summary>
|
|
/// Gets the configuration associated with this HpcLinqContext.
|
|
/// </summary>
|
|
/// <remarks>
|
|
/// The Configuration object returns will be read-only.
|
|
/// </remarks>
|
|
public HpcLinqConfiguration Configuration
|
|
{
|
|
get {
|
|
ThrowIfDisposed();
|
|
return _configuration;
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Gets the DscService associated with this HpcLinqContext.
|
|
/// </summary>
|
|
public DscService DscService
|
|
{
|
|
get
|
|
{
|
|
ThrowIfDisposed();
|
|
return _dscService;
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Gets the HdfsService associated with this HpcLinqContext.
|
|
/// </summary>
|
|
public string HdfsService
|
|
{
|
|
get
|
|
{
|
|
ThrowIfDisposed();
|
|
return _hdfsServiceNode;
|
|
}
|
|
}
|
|
|
|
// internal: the runtime associated with this HpcLinqContext.
|
|
internal HpcQueryRuntime Runtime {
|
|
get
|
|
{
|
|
ThrowIfDisposed();
|
|
return _runtime;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
/// <summary>
|
|
/// Version of the HpcLinq client components
|
|
/// </summary>
|
|
public Version ClientVersion {
|
|
get {
|
|
ThrowIfDisposed();
|
|
if (_clientVersion == null)
|
|
{
|
|
LoadClientVersion(); // thread-safe
|
|
}
|
|
return _clientVersion;
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Version of the HpcLinq server components
|
|
/// </summary>
|
|
public Version ServerVersion {
|
|
get
|
|
{
|
|
ThrowIfDisposed();
|
|
if (_serverVersion == null)
|
|
{
|
|
LoadServerVersion(); // thread-safe
|
|
}
|
|
return _serverVersion;
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Initializes a new instance of the HpcLinqConfiguration class.
|
|
/// </summary>
|
|
/// <param name="configuration">Configuration information.</param>
|
|
/// <remarks>
|
|
/// Connections will be opened to DSC and HPC Server using configuration.HeadNode.
|
|
/// The connections will be opened regardless of whether DSC is used and/or whether
|
|
/// configuration.LocalDebug is true
|
|
/// </remarks>
|
|
public HpcLinqContext(HpcLinqConfiguration configuration)
|
|
{
|
|
// Verify that the head node is set
|
|
if (configuration.HeadNode == null)
|
|
{
|
|
throw new DryadLinqException(HpcLinqErrorCode.ClusterNameMustBeSpecified,
|
|
SR.ClusterNameMustBeSpecified);
|
|
}
|
|
|
|
_configuration = configuration.MakeImmutableCopy();
|
|
_runtime = new HpcQueryRuntime(_configuration.HeadNode);
|
|
_dscService = new DscService(_configuration.HeadNode);
|
|
_hdfsServiceNode = _configuration.HdfsNameNode;
|
|
}
|
|
|
|
private void LoadClientVersion()
|
|
{
|
|
try
|
|
{
|
|
Assembly asm = Assembly.GetExecutingAssembly();
|
|
_clientVersion = new Version(FileVersionInfo.GetVersionInfo(asm.Location).FileVersion);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
throw new DryadLinqException(HpcLinqErrorCode.CouldNotGetClientVersion,
|
|
SR.CouldNotGetClientVersion, ex);
|
|
}
|
|
}
|
|
|
|
private void LoadServerVersion()
|
|
{
|
|
try
|
|
{
|
|
IServerVersion version = this.GetIScheduler().GetServerVersion();
|
|
_serverVersion = new Version(version.Major, version.Minor, version.Build, version.Revision);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
throw new DryadLinqException(HpcLinqErrorCode.CouldNotGetServerVersion,
|
|
SR.CouldNotGetServerVersion, ex);
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Open a DSC fileset as a LINQ-to-HPC IQueryable{T}.
|
|
/// </summary>
|
|
/// <typeparam name="T">The type of the records in the table.</typeparam>
|
|
/// <param name="dscFileSetName">The name of the DSC fileset. </param>
|
|
/// <returns>An IQueryable{T} representing the data and associated with the HPC LINQ query provider.</returns>
|
|
public IQueryable<T> FromDsc<T>(string fileSetName)
|
|
{
|
|
ThrowIfDisposed();
|
|
|
|
string fullPath = DataPath.MakeDscStreamUri(_dscService, fileSetName);
|
|
|
|
try {
|
|
DscFileSet fs = _dscService.GetFileSet(fileSetName);
|
|
if (!fs.IsSealed())
|
|
{
|
|
throw new DryadLinqException(HpcLinqErrorCode.FileSetMustBeSealed,
|
|
SR.FileSetMustBeSealed);
|
|
}
|
|
|
|
int fileCount = fs.GetFiles().Count();
|
|
if (fileCount < 1)
|
|
{
|
|
throw new DryadLinqException(HpcLinqErrorCode.FileSetMustHaveAtLeastOneFile,
|
|
SR.FileSetMustHaveAtLeastOneFile);
|
|
}
|
|
|
|
}
|
|
catch (DscException dscEx){
|
|
throw new DryadLinqException(HpcLinqErrorCode.FileSetCouldNotBeOpened,
|
|
SR.FileSetCouldNotBeOpened, dscEx);
|
|
}
|
|
|
|
DryadLinqQuery<T> q = DataProvider.GetPartitionedTable<T>(this, fullPath);
|
|
q.CheckAndInitialize(); // force the data-info checks.
|
|
return q;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Open a HDFS fileset as an IQueryable{T}.
|
|
/// </summary>
|
|
/// <typeparam name="T">The type of the records in the table.</typeparam>
|
|
/// <param name="dscFileSetName">The name of the HDFS fileset. </param>
|
|
/// <returns>An IQueryable{T} representing the data and associated with the HPC LINQ query provider.</returns>
|
|
public IQueryable<T> FromHdfs<T>(string fileSetName)
|
|
{
|
|
ThrowIfDisposed();
|
|
|
|
string fullPath = DataPath.MakeHdfsStreamUri(_hdfsServiceNode, fileSetName);
|
|
return DataProvider.GetPartitionedTable<T>(this, fullPath);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Converts an IEnumerable{T} to a LINQ-to-HPC IQueryable{T}.
|
|
/// </summary>
|
|
/// <typeparam name="T">The type of the records in the table.</typeparam>
|
|
/// <param name="data">The source data.</param>
|
|
/// <returns>An IQueryable{T} representing the data and associated with the HPC LINQ query provider.</returns>
|
|
/// <remarks>
|
|
/// The source data will be serialized to a DSC fileset using the LINQ-to-HPC serialization approach.
|
|
/// The resulting fileset will have an auto-generated name and a temporary lease.
|
|
/// </remarks>
|
|
public IQueryable<T> FromEnumerable<T>(IEnumerable<T> data)
|
|
{
|
|
string fileSetName = DataPath.MakeUniqueTemporaryDscFileSetName();
|
|
DscCompressionScheme compressionScheme = Configuration.IntermediateDataCompressionScheme;
|
|
DryadLinqMetaData metadata = DryadLinqMetaData.ForLocalDebug(this, typeof(T), fileSetName, compressionScheme);
|
|
return DataProvider.IngressTemporaryDataDirectlyToDsc(this, data, fileSetName, metadata, compressionScheme);
|
|
}
|
|
|
|
|
|
|
|
/// <summary>
|
|
/// Releases all resources used by the HpcLinqContext.
|
|
/// </summary>
|
|
public void Dispose()
|
|
{
|
|
_configuration = null;
|
|
if (_runtime != null)
|
|
{
|
|
_runtime.Dispose();
|
|
_runtime = null;
|
|
}
|
|
if (_dscService != null)
|
|
{
|
|
_dscService.Close();
|
|
_dscService = null;
|
|
}
|
|
}
|
|
|
|
internal static HpcLinqContext GetContext(DryadLinqProviderBase provider)
|
|
{
|
|
HpcLinqContext context = provider.Context;
|
|
Debug.Assert(context != null, "A context should always be associated with a HpcLinqQuery<T>");
|
|
context.ThrowIfDisposed();
|
|
return context;
|
|
}
|
|
|
|
// Return IScheduler reference for internal use
|
|
internal IScheduler GetIScheduler()
|
|
{
|
|
return _runtime.GetIScheduler();
|
|
}
|
|
|
|
internal void ThrowIfDisposed()
|
|
{
|
|
if (_configuration == null)
|
|
{
|
|
throw new DryadLinqException(HpcLinqErrorCode.ContextDisposed,
|
|
SR.ContextDisposed);
|
|
}
|
|
}
|
|
}
|
|
}
|