/* Copyright (c) Microsoft Corporation All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, MERCHANTABLITY OR NON-INFRINGEMENT. See the Apache Version 2.0 License for specific language governing permissions and limitations under the License. */ // // � Microsoft Corporation. All rights reserved. // using System; using System.Collections.Generic; using System.Text; using System.IO; using System.Data.SqlTypes; using System.Diagnostics; using System.Runtime.Serialization.Formatters.Binary; using System.Runtime.Serialization; using Microsoft.Research.DryadLinq.Internal; using Microsoft.Research.DryadLinq; namespace Microsoft.Research.DryadLinq { /// /// HpcBinaryWriter is the main interface for user provided custom serializers /// or DL-internal autoserialization codepaths to write primitive types from a partition file. /// public unsafe sealed class HpcBinaryWriter { private const Int32 DefaultBlockSize = 256 * 1024; private NativeBlockStream m_nativeStream; private Encoding m_encoding; private Int32 m_nextBlockSize; private Int32 m_bufferSizeHint; private DataBlockInfo m_curDataBlockInfo; private byte* m_curDataBlock; // The current write buffer. This is allocated from the native stream, // individual WriteXXX methods serialize primitives into this buffer, // and it gets written out when it's full private Int32 m_curBlockSize; // Size of the current write buffer. private Int32 m_curRecordStart; private Int32 m_curRecordEnd; private Int32 m_charMaxByteCount; private bool m_isClosed; private Int64 m_numBytesWritten; private bool m_calcFP; private BinaryFormatter m_bfm; internal HpcBinaryWriter(NativeBlockStream stream) : this(stream, Encoding.UTF8) { } internal HpcBinaryWriter(NativeBlockStream stream, Encoding encoding) : this(stream, encoding, DefaultBlockSize) { } internal HpcBinaryWriter(NativeBlockStream stream, Encoding encoding, Int32 buffSize) { this.m_nativeStream = stream; this.m_encoding = encoding; this.m_nextBlockSize = Math.Max(DefaultBlockSize, buffSize / 2); this.m_bufferSizeHint = buffSize; this.m_curDataBlockInfo.dataBlock = null; this.m_curDataBlockInfo.blockSize = 0; this.m_curDataBlockInfo.itemHandle = IntPtr.Zero; this.m_curDataBlock = this.m_curDataBlockInfo.dataBlock; this.m_curBlockSize = this.m_curDataBlockInfo.blockSize; this.m_curRecordStart = 0; this.m_curRecordEnd = 0; this.m_charMaxByteCount = this.m_encoding.GetMaxByteCount(1); this.m_isClosed = false; this.m_numBytesWritten = 0; this.m_calcFP = false; this.m_bfm = new BinaryFormatter(); } internal HpcBinaryWriter(IntPtr vertexInfo, UInt32 portNum, Int32 buffSize) : this(new HpcLinqChannel(vertexInfo, portNum, false), Encoding.UTF8, buffSize) { } internal HpcBinaryWriter(IntPtr vertexInfo, UInt32 portNum, Encoding encoding, Int32 buffSize) : this(new HpcLinqChannel(vertexInfo, portNum, false), encoding, buffSize) { } ~HpcBinaryWriter() { this.Close(); } //////////////////////////////////////////////////////////////////////////////// // // Internal methods // internal Int32 BufferSizeHint { get { return this.m_bufferSizeHint; } } internal void CompleteWriteRecord() { this.m_curRecordStart = this.m_curRecordEnd; } internal bool CalcFP { get { return this.m_calcFP; } set { this.m_nativeStream.SetCalcFP(); this.m_calcFP = value; } } internal string GetChannelURI() { return this.m_nativeStream.GetURI(); } internal Int64 GetTotalLength() { return this.m_nativeStream.GetTotalLength(); } internal UInt64 GetFingerPrint() { if (!this.m_calcFP) { throw new DryadLinqException(HpcLinqErrorCode.FingerprintDisabled, SR.FingerprintDisabled); } return this.m_nativeStream.GetFingerPrint(); } /// /// Writes out the current data buffer (equivalent of FlushDataBlock), and calls /// Flush on the native stream to ensure all the data makes its way to the disk /// internal void Flush() { if (this.m_curRecordEnd > 0) { this.m_nativeStream.WriteDataBlock(this.m_curDataBlockInfo.itemHandle, this.m_curRecordEnd); this.m_numBytesWritten += this.m_curRecordEnd; this.m_nativeStream.ReleaseDataBlock(this.m_curDataBlockInfo.itemHandle); this.m_curDataBlockInfo.itemHandle = IntPtr.Zero; this.m_curDataBlockInfo = this.m_nativeStream.AllocateDataBlock(this.m_curBlockSize); this.m_curDataBlock = this.m_curDataBlockInfo.dataBlock; this.m_curBlockSize = this.m_curDataBlockInfo.blockSize; this.m_curRecordStart = 0; this.m_curRecordEnd = 0; } this.m_nativeStream.Flush(); } /// /// Internal entry point to flush and close the writer. This is called by the record writer /// internal void Close() { if (!this.m_isClosed) { this.m_isClosed = true; this.Flush(); if (this.m_curBlockSize > 0) { this.m_nativeStream.ReleaseDataBlock(this.m_curDataBlockInfo.itemHandle); } this.m_nativeStream.Close(); } GC.SuppressFinalize(this); } /// /// Private helper to write the current block out to the native stream. /// - it writes out the current data buffer up to the point it was filled /// - it releases the current data block back to the native stream code (which owns the lifecycle of read buffers), /// - then allocated a new buffer from the native stream /// - and updates the internal read buffer pointer and position members /// private void FlushDataBlock() { DataBlockInfo newDataBlockInfo; if (this.m_curRecordStart <= 16) { // The current block is too small for a single record, augment it if (this.m_curBlockSize == this.m_nextBlockSize) { throw new DryadLinqException(HpcLinqErrorCode.RecordSizeMax2GB, SR.RecordSizeMax2GB); } newDataBlockInfo = this.m_nativeStream.AllocateDataBlock(this.m_nextBlockSize); this.m_nextBlockSize = this.m_nextBlockSize * 2; if (this.m_nextBlockSize < 0) { this.m_nextBlockSize = 0x7FFFFFF8; } HpcLinqUtil.memcpy(this.m_curDataBlock, newDataBlockInfo.dataBlock, this.m_curRecordEnd); } else { // Write all the complete records in the block, put the partial record in the new block newDataBlockInfo = this.m_nativeStream.AllocateDataBlock(this.m_curBlockSize); HpcLinqUtil.memcpy(this.m_curDataBlock + this.m_curRecordStart, newDataBlockInfo.dataBlock, this.m_curRecordEnd - this.m_curRecordStart); this.m_nativeStream.WriteDataBlock(this.m_curDataBlockInfo.itemHandle, this.m_curRecordStart); this.m_numBytesWritten += this.m_curRecordStart; this.m_curRecordEnd -= this.m_curRecordStart; this.m_curRecordStart = 0; } this.m_nativeStream.ReleaseDataBlock(this.m_curDataBlockInfo.itemHandle); this.m_curDataBlockInfo.itemHandle = IntPtr.Zero; this.m_curDataBlockInfo = newDataBlockInfo; this.m_curDataBlock = newDataBlockInfo.dataBlock; this.m_curBlockSize = newDataBlockInfo.blockSize; } internal Int64 Length { get { return this.m_numBytesWritten + this.m_curRecordEnd; } } //////////////////////////////////////////////////////////////////////////////// // // Public methods // public void Write(byte b) { if (this.m_curRecordEnd == this.m_curBlockSize) { this.FlushDataBlock(); } this.m_curDataBlock[this.m_curRecordEnd++] = b; } public void Write(sbyte b) { if (this.m_curRecordEnd == this.m_curBlockSize) { this.FlushDataBlock(); } this.m_curDataBlock[this.m_curRecordEnd++] = (byte)b; } public void Write(bool b) { if (this.m_curRecordEnd == this.m_curBlockSize) { this.FlushDataBlock(); } this.m_curDataBlock[this.m_curRecordEnd++] = (byte)(b ? 1 : 0); } public void Write(char ch) { if (this.m_curBlockSize - this.m_curRecordEnd < this.m_charMaxByteCount) { this.FlushDataBlock(); } int numBytes = this.m_encoding.GetBytes(&ch, 1, this.m_curDataBlock + this.m_curRecordEnd, this.m_charMaxByteCount); this.m_curRecordEnd += numBytes; } public void Write(short val) { if (this.m_curBlockSize - this.m_curRecordEnd < 2) { this.FlushDataBlock(); } this.m_curDataBlock[this.m_curRecordEnd++] = (byte)val; this.m_curDataBlock[this.m_curRecordEnd++] = (byte)(val >> 8); } public void Write(ushort val) { if (this.m_curBlockSize - this.m_curRecordEnd < 2) { this.FlushDataBlock(); } this.m_curDataBlock[this.m_curRecordEnd++] = (byte)val; this.m_curDataBlock[this.m_curRecordEnd++] = (byte)(val >> 8); } public void Write(int val) { if (this.m_curBlockSize - this.m_curRecordEnd < 4) { this.FlushDataBlock(); } this.m_curDataBlock[this.m_curRecordEnd++] = (byte)val; this.m_curDataBlock[this.m_curRecordEnd++] = (byte)(val >> 8); this.m_curDataBlock[this.m_curRecordEnd++] = (byte)(val >> 16); this.m_curDataBlock[this.m_curRecordEnd++] = (byte)(val >> 24); } public void WriteCompact(int val) { if (this.m_curBlockSize - this.m_curRecordEnd < 4) { this.FlushDataBlock(); } if (val < 0x80) { this.m_curDataBlock[this.m_curRecordEnd++] = (byte)val; } else { this.m_curDataBlock[this.m_curRecordEnd++] = (byte)(val >> 24 | 0x80); this.m_curDataBlock[this.m_curRecordEnd++] = (byte)(val >> 16); this.m_curDataBlock[this.m_curRecordEnd++] = (byte)(val >> 8); this.m_curDataBlock[this.m_curRecordEnd++] = (byte)val; } } internal static int CompactSize(int val) { return (val < 0x80) ? 1 : 4; } private void Write(int val, int loc) { this.m_curDataBlock[loc++] = (byte)val; this.m_curDataBlock[loc++] = (byte)(val >> 8); this.m_curDataBlock[loc++] = (byte)(val >> 16); this.m_curDataBlock[loc++] = (byte)(val >> 24); } private void WriteCompact(int val, int compactSize, int loc) { if (compactSize == 1) { this.m_curDataBlock[loc++] = (byte)val; } else { this.m_curDataBlock[loc++] = (byte)(val >> 24 | 0x80); this.m_curDataBlock[loc++] = (byte)(val >> 16); this.m_curDataBlock[loc++] = (byte)(val >> 8); this.m_curDataBlock[loc++] = (byte)val; } } public void Write(uint val) { if (this.m_curBlockSize - this.m_curRecordEnd < 4) { this.FlushDataBlock(); } this.m_curDataBlock[this.m_curRecordEnd++] = (byte)val; this.m_curDataBlock[this.m_curRecordEnd++] = (byte)(val >> 8); this.m_curDataBlock[this.m_curRecordEnd++] = (byte)(val >> 16); this.m_curDataBlock[this.m_curRecordEnd++] = (byte)(val >> 24); } public void Write(long val) { if (this.m_curBlockSize - this.m_curRecordEnd < 8) { this.FlushDataBlock(); } this.m_curDataBlock[this.m_curRecordEnd++] = (byte)val; this.m_curDataBlock[this.m_curRecordEnd++] = (byte)(val >> 8); this.m_curDataBlock[this.m_curRecordEnd++] = (byte)(val >> 16); this.m_curDataBlock[this.m_curRecordEnd++] = (byte)(val >> 24); this.m_curDataBlock[this.m_curRecordEnd++] = (byte)(val >> 32); this.m_curDataBlock[this.m_curRecordEnd++] = (byte)(val >> 40); this.m_curDataBlock[this.m_curRecordEnd++] = (byte)(val >> 48); this.m_curDataBlock[this.m_curRecordEnd++] = (byte)(val >> 56); } public void Write(ulong val) { if (this.m_curBlockSize - this.m_curRecordEnd < 8) { this.FlushDataBlock(); } this.m_curDataBlock[this.m_curRecordEnd++] = (byte)val; this.m_curDataBlock[this.m_curRecordEnd++] = (byte)(val >> 8); this.m_curDataBlock[this.m_curRecordEnd++] = (byte)(val >> 16); this.m_curDataBlock[this.m_curRecordEnd++] = (byte)(val >> 24); this.m_curDataBlock[this.m_curRecordEnd++] = (byte)(val >> 32); this.m_curDataBlock[this.m_curRecordEnd++] = (byte)(val >> 40); this.m_curDataBlock[this.m_curRecordEnd++] = (byte)(val >> 48); this.m_curDataBlock[this.m_curRecordEnd++] = (byte)(val >> 56); } public void Write(decimal val) { this.WriteRawBytes((byte*)&val, sizeof(decimal)); } public void Write(float val) { uint tmpVal = *(uint*)&val; this.Write(tmpVal); } public void Write(double val) { ulong tmpVal = *(ulong*)&val; this.Write(tmpVal); } private const Int32 KindShift = 62; public void Write(DateTime val) { UInt64 tempVal = (UInt64)val.Ticks | (((UInt64)val.Kind) << KindShift); this.Write(tempVal); } public void Write(SqlDateTime val) { this.Write(val.DayTicks); this.Write(val.TimeTicks); } public void Write(Guid guid) { WriteRawBytes((byte*)&guid, sizeof(Guid)); } public void Write(string val) { Int32 len = val.Length; Int32 maxByteCount = this.m_encoding.GetMaxByteCount(len); Int32 compactSize = CompactSize(maxByteCount); while (this.m_curBlockSize - this.m_curRecordEnd < (maxByteCount + 8)) { this.FlushDataBlock(); } this.WriteCompact(len); int buffLoc = this.m_curRecordEnd; this.m_curRecordEnd += compactSize; int numBytes; fixed (char* pVal = val) { numBytes = this.m_encoding.GetBytes(pVal, len, this.m_curDataBlock + this.m_curRecordEnd, this.m_curBlockSize - this.m_curRecordEnd); } this.m_curRecordEnd += numBytes; this.WriteCompact(numBytes, compactSize, buffLoc); } public void WriteChars(char[] charBuffer, int offset, int charCount) { if (charBuffer == null) { throw new ArgumentNullException("charBuffer"); } if (offset < 0) { throw new ArgumentOutOfRangeException("offset"); } if (charCount < 0) { throw new ArgumentOutOfRangeException("charCount"); } if (charBuffer.Length < (offset + charCount)) { throw new ArgumentOutOfRangeException("charBuffer", String.Format(SR.ArrayLengthVsCountAndOffset, "charBuffer", offset + charCount, "offset", "charCount")); } Int32 maxByteCount = this.m_encoding.GetMaxByteCount(charCount); // if current block doesn't have enough space flush it and allocate a new one while (this.m_curBlockSize - this.m_curRecordEnd < maxByteCount ) { this.FlushDataBlock(); } int buffLoc = this.m_curRecordEnd; int numBytes; fixed (char* pVal = charBuffer ) { numBytes = this.m_encoding.GetBytes(pVal + offset, charCount, this.m_curDataBlock + this.m_curRecordEnd, this.m_curBlockSize - this.m_curRecordEnd); } this.m_curRecordEnd += numBytes; } public void WriteBytes(byte[] byteBuffer, int offset, int byteCount) { if (byteBuffer == null) { throw new ArgumentNullException("byteBuffer"); } if (offset < 0) { throw new ArgumentOutOfRangeException("offset"); } if (byteCount < 0) { throw new ArgumentOutOfRangeException("byteCount"); } if (byteBuffer.Length < (offset + byteCount)) { throw new ArgumentOutOfRangeException("byteBuffer", String.Format(SR.ArrayLengthVsCountAndOffset, "byteBuffer", offset + byteCount, "offset", "byteCount")); } while (this.m_curBlockSize - this.m_curRecordEnd < byteCount) { this.FlushDataBlock(); } fixed (byte* pBytes = byteBuffer) { HpcLinqUtil.memcpy(pBytes + offset, this.m_curDataBlock + this.m_curRecordEnd, byteCount); } this.m_curRecordEnd += byteCount; } /// /// Public helper to write from a caller provided byte* to the output stream. /// This is mainly used to read preallocated fixed size, non-integer types (Guid, decimal etc). /// public void WriteRawBytes(byte* pBytes, Int32 numBytes) { while (this.m_curBlockSize - this.m_curRecordEnd < numBytes) { this.FlushDataBlock(); } HpcLinqUtil.memcpy(pBytes, this.m_curDataBlock + this.m_curRecordEnd, numBytes); this.m_curRecordEnd += numBytes; } } } namespace Microsoft.Research.DryadLinq.Internal { // internal adapter class to make a HpcBinaryWriter work as a Stream // this is needed to reuse Stream-based serialization code. internal class HpcBinaryWriterToStreamAdapter : Stream { private HpcBinaryWriter m_dbw; internal HpcBinaryWriterToStreamAdapter(HpcBinaryWriter dbw) { m_dbw = dbw; } public override bool CanRead { get { return false; } } public override bool CanSeek { get { return false; } } public override bool CanWrite { get { return true; } } public override void Flush() { m_dbw.Flush(); } public override long Length { get { return m_dbw.Length; } } public override long Position { get { return m_dbw.Length; } set { throw new DryadLinqException(HpcLinqErrorCode.SettingPositionNotSupported, SR.SettingPositionNotSupported); } } public override int Read(byte[] buffer, int offset, int count) { throw new DryadLinqException(HpcLinqErrorCode.ReadNotAllowed, SR.ReadNotAllowed); } public override long Seek(long offset, SeekOrigin origin) { throw new DryadLinqException(HpcLinqErrorCode.SeekNotSupported, SR.SeekNotSupported); } public override void SetLength(long value) { throw new DryadLinqException(HpcLinqErrorCode.SetLengthNotSupported, SR.SetLengthNotSupported); } public override void Write(byte[] buffer, int offset, int count) { m_dbw.WriteBytes(buffer, offset, count); } public override void WriteByte(byte value) { m_dbw.Write(value); } public override void Close() { try { m_dbw.Close(); } finally { base.Dispose(true); } } } }