/* Copyright (c) Microsoft Corporation All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, MERCHANTABLITY OR NON-INFRINGEMENT. See the Apache Version 2.0 License for specific language governing permissions and limitations under the License. */ #pragma once #include #include #include #include /* Structured data elements passed on restartable channels are called Items. Every item is derived from the RChannelItem base class and has a type, a sequence number and a metadata description. The type is assigned by the creator of the item, and is discussed below. The sequence number is only used for items being read from a channel and is assigned automatically by the channel reader mechanism. The metadata is initially populated by the item creator and may be initialized from the underlying data stream on a read channel, but may be added to or modified subsequently during processing. Items fall into two fundamental classes: data items and marker items. Data items have type RChannelItem_Data and carry application-specific data. A data item which is read from a channel is guaranteed to have been created by an application-specific parser object which consumes byte-oriented buffers, and may be cast by the application into a rich type. The sequence number of a data item is automatically assigned by the channel reader machinery, and data items read from a channel have unique, densely assigned, increasing sequence numbers starting at zero. All non-data item types are known as marker items and are used to represent and serialize information such as error conditions and end-of-stream information. Errors can include information about "holes" in the underlying data stream (caused e.g. when a subset of the input data is unavailable) and parse failures. Marker items may be produced and consumed by a variety of components, and information is passed between these components using the metadata. There are convenience methods below for creating standard error marker items. The sequence number of a marker item is automatically assigned by the channel reader machinery, and it is equal to the sequence number of the next data item (or, equivalently, one greater than the sequence number of the preceding data item). On write, all items are passed to an application-specific marshaling object which knows how to serialize both marker items and data items with a particular format into byte-oriented buffers. Not all channels store or transmit marker items, so it is legal for the marshaler to do nothing when presented with a marker item, but in general it is expected that applications will make use of marker items to convey rich monitoring and debugging information on internal channels between the vertices of a distributed computation. A valid complete channel consists of a sequence of "body" items followed by a single "termination" item. A channel reader will always produce such a sequence unless interrupted by the application calling Drain prematurely, and a channel writer should be fed such a sequence. Termination items have type EndOfStream, Abort, Restart, ParseError and MarshalError: all other items are body items and may appear in any sequence in a valid channel. No item may appear in a channel after a termination item. The item constructor is private to allow applications to use private memory management. Each item supplies a Create method and is reference-counted: a reference is released by calling DecRef on the item. */ enum RChannelItemType { /* RChannelItem_Data: a data item generated by application specific code in the parser or the application body. The application may cast this item to a rich derived type. */ RChannelItem_Data, /* RChannelItem_BufferHole: a marker item supplied by the buffer reader describing a hole in the underlying data stream. */ RChannelItem_BufferHole, /* RChannelItem_ItemHole: a marker item supplied by the parser describing a hole in the parsed data stream (usually occurring because of malformed input data which the parser has skipped over). */ RChannelItem_ItemHole, /* RChannelItem_EndOfStream: a marker item appearing as the last item in the stream and signaling clean completion. */ RChannelItem_EndOfStream, /* RChannelItem_Restart: a marker item appearing as the last item in the stream signaling an error condition and requesting that the channel be restarted if possible. */ RChannelItem_Restart, /* RChannelItem_Abort: a marker item appearing as the last item in the stream signaling an unrecoverable error condition. */ RChannelItem_Abort, /* RChannelItem_ParseError: a marker item supplied by the parser signifying an unrecoverable error. */ RChannelItem_ParseError, /* RChannelItem_MarshalError: a marker item supplied by the parser signifying an unrecoverable error. */ RChannelItem_MarshalError }; class RChannelItem; typedef DrRef RChannelItemRef; class DrResettableMemoryReader : public DrMemoryBufferReader { public: DrResettableMemoryReader(DrMemoryBuffer *pMemoryBuffer); void ResetToBufferOffset(Size_t offset); }; class RChannelItem : public DrRefCounter { public: /* returns true if the item can be used as a channel termination marker, false otherwise */ static bool IsTerminationItem(RChannelItemType type); /* return the type of the item, set at creation time. */ RChannelItemType GetType(); /* The sequence number of the item in the Channel with respect to all delivered Data items. This sequence number is generated locally, and so if a Data item appears after any holes in the stream it may not correspond to the sequence number for this item when read a second time, or from another reader. If the item type is not RChannelItem_Data this number is the sequence number of the *next* Data item, if any. Data items are assigned dense, increasing sequence numbers. */ UInt64 GetDataSequenceNumber(); /* The sequence number of the item in the Channel with respect to all delivered items. This sequence number is generated locally, and so if there are any markers or holes in the stream it may not correspond to the sequence number for this item when read a second time, or from another reader. Items are assigned dense, increasing sequence numbers. */ UInt64 GetDeliverySequenceNumber(); /* this returns a description of the item, used for debugging and monitoring purposes. When the item is created the metadata is NULL. */ DryadMetaData* GetMetaData(); /* these set the item's sequence numbers and are called by the channel reader */ void SetDataSequenceNumber(UInt64 dataSequenceNumber); void SetDeliverySequenceNumber(UInt64 deliverySequenceNumber); /* this replaces the item's current metadata with a new object */ void ReplaceMetaData(DryadMetaData* metaData); /* this does a shallow copy. By default this assert-fails, and concrete Data items which need to be cloned must implement an item-specific method. */ virtual void Clone(RChannelItemRef* pClonedItem); virtual UInt64 GetNumberOfSubItems() const; virtual void TruncateSubItems(UInt64 numberOfSubItems); virtual UInt64 GetItemSize() const; virtual DrError DeSerialize(DrResettableMemoryReader* reader, Size_t availableSize); virtual DrError DeSerializePartial(DrResettableMemoryReader* reader, Size_t availableSize); virtual DrError Serialize(ChannelMemoryBufferWriter* writer); /* if the item has metadata which contains a Prop_Dryad_ErrorCode then this is returned. Otherwise the error depends on the type: RChannelItem_Data: DrError_OK RChannelItem_BufferHole: DryadError_BufferHole RChannelItem_ItemHole: DryadError_ItemHole RChannelItem_EndOfStream: DrError_EndOfStream RChannelItem_Restart: DryadError_ChannelRestart RChannelItem_Abort: DryadError_ChannelAbort RChannelItem_ParseError: DryadError_ItemParseError RChannelItem_MarshalError: DryadError_ItemMarshalError */ DrError GetErrorFromItem(); static const UInt64 s_invalidSequenceNumber = ((UInt64) -1); static const UInt32 s_defaultItemBatchSize = 16; static const UInt32 s_defaultRecordBatchSize = 256; protected: RChannelItem(RChannelItemType type); virtual ~RChannelItem(); private: RChannelItemType m_type; UInt64 m_dataSequenceNumber; UInt64 m_deliverySequenceNumber; DryadMetaDataRef m_metaData; DrBListEntry m_listPtr; friend class DryadBList; }; typedef DryadBList RChannelItemList; class RChannelMarkerItem : public RChannelItem { public: /* used to create a standard marker item. If withMetaData is true, an empty metadata object is created with the item, otherwise the item's metadata is left NULL. */ static RChannelMarkerItem* Create(RChannelItemType type, bool withMetaData); /* this does a shallow copy, which shares the same metadata as the original (with an increased refcount). */ virtual void Clone(RChannelItemRef* pClonedItem); /* these convenience methods create marker items with common metadata fields already filled in */ /* fills in the metadata element Prop_Dryad_ErrorCode with the supplied information */ static RChannelItem* CreateErrorItem(RChannelItemType itemType, DrError errorCode); /* fills in the metadata elements Prop_Dryad_ErrorCode and Prop_Dryad_ErrorString with the supplied information. */ static RChannelItem* CreateErrorItemWithDescription(RChannelItemType itemType, DrError errorCode, const char* errorDescription); protected: RChannelMarkerItem(RChannelItemType type); virtual ~RChannelMarkerItem(); }; class RChannelDataItem : public RChannelItem { public: virtual UInt64 GetNumberOfSubItems() const; virtual UInt64 GetItemSize() const; protected: RChannelDataItem(); virtual ~RChannelDataItem(); }; class RChannelItemArray : public DrRefCounter { public: RChannelItemArray(); ~RChannelItemArray(); void SetNumberOfItems(UInt32 numberOfItems); void ExtendNumberOfItems(UInt32 numberOfItems); UInt32 GetNumberOfItems(); void TruncateToSize(UInt32 prefix); void DiscardPrefix(UInt32 prefix); RChannelItemRef* GetItemArray(); private: UInt32 m_numberOfItems; RChannelItemRef* m_baseItemArray; RChannelItemRef* m_itemArray; }; typedef DrRef RChannelItemArrayRef;