295 lines
11 KiB
C++
295 lines
11 KiB
C++
/*
|
|
Copyright (c) Microsoft Corporation
|
|
|
|
All rights reserved.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in
|
|
compliance with the License. You may obtain a copy of the License
|
|
at http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
|
THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER
|
|
EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED WARRANTIES OR CONDITIONS OF
|
|
TITLE, FITNESS FOR A PARTICULAR PURPOSE, MERCHANTABLITY OR NON-INFRINGEMENT.
|
|
|
|
|
|
See the Apache Version 2.0 License for specific language governing permissions and
|
|
limitations under the License.
|
|
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
#include <DrCommon.h>
|
|
#include <dryadlisthelper.h>
|
|
#include <dryadmetadata.h>
|
|
#include <channelmemorybuffers.h>
|
|
|
|
/*
|
|
Structured data elements passed on restartable channels are called
|
|
Items.
|
|
|
|
Every item is derived from the RChannelItem base class and has a
|
|
type, a sequence number and a metadata description. The type is
|
|
assigned by the creator of the item, and is discussed below. The
|
|
sequence number is only used for items being read from a channel and
|
|
is assigned automatically by the channel reader mechanism. The
|
|
metadata is initially populated by the item creator and may be
|
|
initialized from the underlying data stream on a read channel, but
|
|
may be added to or modified subsequently during processing.
|
|
|
|
Items fall into two fundamental classes: data items and marker
|
|
items. Data items have type RChannelItem_Data and carry
|
|
application-specific data. A data item which is read from a channel
|
|
is guaranteed to have been created by an application-specific parser
|
|
object which consumes byte-oriented buffers, and may be cast by the
|
|
application into a rich type. The sequence number of a data item is
|
|
automatically assigned by the channel reader machinery, and data
|
|
items read from a channel have unique, densely assigned, increasing
|
|
sequence numbers starting at zero.
|
|
|
|
All non-data item types are known as marker items and are used to
|
|
represent and serialize information such as error conditions and
|
|
end-of-stream information. Errors can include information about
|
|
"holes" in the underlying data stream (caused e.g. when a subset of
|
|
the input data is unavailable) and parse failures. Marker items may
|
|
be produced and consumed by a variety of components, and information
|
|
is passed between these components using the metadata. There are
|
|
convenience methods below for creating standard error marker
|
|
items. The sequence number of a marker item is automatically
|
|
assigned by the channel reader machinery, and it is equal to the
|
|
sequence number of the next data item (or, equivalently, one greater
|
|
than the sequence number of the preceding data item).
|
|
|
|
On write, all items are passed to an application-specific marshaling
|
|
object which knows how to serialize both marker items and data items
|
|
with a particular format into byte-oriented buffers. Not all
|
|
channels store or transmit marker items, so it is legal for the
|
|
marshaler to do nothing when presented with a marker item, but in
|
|
general it is expected that applications will make use of marker
|
|
items to convey rich monitoring and debugging information on
|
|
internal channels between the vertices of a distributed computation.
|
|
|
|
A valid complete channel consists of a sequence of "body" items
|
|
followed by a single "termination" item. A channel reader will
|
|
always produce such a sequence unless interrupted by the application
|
|
calling Drain prematurely, and a channel writer should be fed such a
|
|
sequence. Termination items have type EndOfStream, Abort, Restart,
|
|
ParseError and MarshalError: all other items are body items and may
|
|
appear in any sequence in a valid channel. No item may appear in a
|
|
channel after a termination item.
|
|
|
|
The item constructor is private to allow applications to use private
|
|
memory management. Each item supplies a Create method and is
|
|
reference-counted: a reference is released by calling DecRef on the
|
|
item.
|
|
*/
|
|
|
|
enum RChannelItemType {
|
|
/* RChannelItem_Data: a data item generated by application
|
|
specific code in the parser or the application body. The
|
|
application may cast this item to a rich derived type. */
|
|
RChannelItem_Data,
|
|
|
|
/* RChannelItem_BufferHole: a marker item supplied by the buffer
|
|
reader describing a hole in the underlying data stream. */
|
|
RChannelItem_BufferHole,
|
|
|
|
/* RChannelItem_ItemHole: a marker item supplied by the parser
|
|
describing a hole in the parsed data stream (usually occurring
|
|
because of malformed input data which the parser has skipped
|
|
over). */
|
|
RChannelItem_ItemHole,
|
|
|
|
/* RChannelItem_EndOfStream: a marker item appearing as the last
|
|
item in the stream and signaling clean completion. */
|
|
RChannelItem_EndOfStream,
|
|
|
|
/* RChannelItem_Restart: a marker item appearing as the last item
|
|
in the stream signaling an error condition and requesting that
|
|
the channel be restarted if possible. */
|
|
RChannelItem_Restart,
|
|
|
|
/* RChannelItem_Abort: a marker item appearing as the last item in
|
|
the stream signaling an unrecoverable error condition. */
|
|
RChannelItem_Abort,
|
|
|
|
/* RChannelItem_ParseError: a marker item supplied by the parser
|
|
signifying an unrecoverable error. */
|
|
RChannelItem_ParseError,
|
|
|
|
/* RChannelItem_MarshalError: a marker item supplied by the parser
|
|
signifying an unrecoverable error. */
|
|
RChannelItem_MarshalError
|
|
};
|
|
|
|
class RChannelItem;
|
|
typedef DrRef<RChannelItem> RChannelItemRef;
|
|
|
|
class DrResettableMemoryReader : public DrMemoryBufferReader
|
|
{
|
|
public:
|
|
DrResettableMemoryReader(DrMemoryBuffer *pMemoryBuffer);
|
|
void ResetToBufferOffset(Size_t offset);
|
|
};
|
|
|
|
class RChannelItem : public DrRefCounter
|
|
{
|
|
public:
|
|
/* returns true if the item can be used as a channel termination
|
|
marker, false otherwise */
|
|
static bool IsTerminationItem(RChannelItemType type);
|
|
|
|
/* return the type of the item, set at creation time. */
|
|
RChannelItemType GetType();
|
|
|
|
/* The sequence number of the item in the Channel with respect to
|
|
all delivered Data items. This sequence number is generated
|
|
locally, and so if a Data item appears after any holes in the
|
|
stream it may not correspond to the sequence number for this
|
|
item when read a second time, or from another reader.
|
|
|
|
If the item type is not RChannelItem_Data this number is the
|
|
sequence number of the *next* Data item, if any. Data items are
|
|
assigned dense, increasing sequence numbers.
|
|
*/
|
|
UInt64 GetDataSequenceNumber();
|
|
|
|
/* The sequence number of the item in the Channel with respect to
|
|
all delivered items. This sequence number is generated locally,
|
|
and so if there are any markers or holes in the stream it may
|
|
not correspond to the sequence number for this item when read a
|
|
second time, or from another reader. Items are assigned dense,
|
|
increasing sequence numbers.
|
|
*/
|
|
UInt64 GetDeliverySequenceNumber();
|
|
|
|
/* this returns a description of the item, used for debugging and
|
|
monitoring purposes. When the item is created the metadata is
|
|
NULL.
|
|
*/
|
|
DryadMetaData* GetMetaData();
|
|
|
|
/* these set the item's sequence numbers and are called by the
|
|
channel reader */
|
|
void SetDataSequenceNumber(UInt64 dataSequenceNumber);
|
|
void SetDeliverySequenceNumber(UInt64 deliverySequenceNumber);
|
|
|
|
/* this replaces the item's current metadata with a new object */
|
|
void ReplaceMetaData(DryadMetaData* metaData);
|
|
|
|
/* this does a shallow copy. By default this assert-fails, and
|
|
concrete Data items which need to be cloned must implement
|
|
an item-specific method. */
|
|
virtual void Clone(RChannelItemRef* pClonedItem);
|
|
|
|
virtual UInt64 GetNumberOfSubItems() const;
|
|
virtual void TruncateSubItems(UInt64 numberOfSubItems);
|
|
virtual UInt64 GetItemSize() const;
|
|
|
|
virtual DrError DeSerialize(DrResettableMemoryReader* reader,
|
|
Size_t availableSize);
|
|
virtual DrError DeSerializePartial(DrResettableMemoryReader* reader,
|
|
Size_t availableSize);
|
|
virtual DrError Serialize(ChannelMemoryBufferWriter* writer);
|
|
|
|
/* if the item has metadata which contains a Prop_Dryad_ErrorCode
|
|
then this is returned. Otherwise the error depends on the type:
|
|
RChannelItem_Data: DrError_OK
|
|
RChannelItem_BufferHole: DryadError_BufferHole
|
|
RChannelItem_ItemHole: DryadError_ItemHole
|
|
RChannelItem_EndOfStream: DrError_EndOfStream
|
|
RChannelItem_Restart: DryadError_ChannelRestart
|
|
RChannelItem_Abort: DryadError_ChannelAbort
|
|
RChannelItem_ParseError: DryadError_ItemParseError
|
|
RChannelItem_MarshalError: DryadError_ItemMarshalError
|
|
*/
|
|
DrError GetErrorFromItem();
|
|
|
|
static const UInt64 s_invalidSequenceNumber = ((UInt64) -1);
|
|
|
|
static const UInt32 s_defaultItemBatchSize = 16;
|
|
static const UInt32 s_defaultRecordBatchSize = 256;
|
|
|
|
protected:
|
|
RChannelItem(RChannelItemType type);
|
|
virtual ~RChannelItem();
|
|
|
|
private:
|
|
RChannelItemType m_type;
|
|
UInt64 m_dataSequenceNumber;
|
|
UInt64 m_deliverySequenceNumber;
|
|
DryadMetaDataRef m_metaData;
|
|
DrBListEntry m_listPtr;
|
|
friend class DryadBList<RChannelItem>;
|
|
};
|
|
|
|
typedef DryadBList<RChannelItem> RChannelItemList;
|
|
|
|
class RChannelMarkerItem : public RChannelItem
|
|
{
|
|
public:
|
|
/* used to create a standard marker item. If withMetaData is true,
|
|
an empty metadata object is created with the item, otherwise
|
|
the item's metadata is left NULL. */
|
|
static RChannelMarkerItem*
|
|
Create(RChannelItemType type, bool withMetaData);
|
|
|
|
/* this does a shallow copy, which shares the same metadata as the
|
|
original (with an increased refcount). */
|
|
virtual void Clone(RChannelItemRef* pClonedItem);
|
|
|
|
|
|
/* these convenience methods create marker items with common
|
|
metadata fields already filled in */
|
|
|
|
/* fills in the metadata element Prop_Dryad_ErrorCode with the
|
|
supplied information */
|
|
static RChannelItem* CreateErrorItem(RChannelItemType itemType,
|
|
DrError errorCode);
|
|
/* fills in the metadata elements Prop_Dryad_ErrorCode and
|
|
Prop_Dryad_ErrorString with the supplied information. */
|
|
static RChannelItem*
|
|
CreateErrorItemWithDescription(RChannelItemType itemType,
|
|
DrError errorCode,
|
|
const char* errorDescription);
|
|
|
|
protected:
|
|
RChannelMarkerItem(RChannelItemType type);
|
|
virtual ~RChannelMarkerItem();
|
|
};
|
|
|
|
class RChannelDataItem : public RChannelItem
|
|
{
|
|
public:
|
|
virtual UInt64 GetNumberOfSubItems() const;
|
|
virtual UInt64 GetItemSize() const;
|
|
|
|
protected:
|
|
RChannelDataItem();
|
|
virtual ~RChannelDataItem();
|
|
};
|
|
|
|
class RChannelItemArray : public DrRefCounter
|
|
{
|
|
public:
|
|
RChannelItemArray();
|
|
~RChannelItemArray();
|
|
|
|
void SetNumberOfItems(UInt32 numberOfItems);
|
|
void ExtendNumberOfItems(UInt32 numberOfItems);
|
|
UInt32 GetNumberOfItems();
|
|
|
|
void TruncateToSize(UInt32 prefix);
|
|
void DiscardPrefix(UInt32 prefix);
|
|
|
|
RChannelItemRef* GetItemArray();
|
|
|
|
private:
|
|
UInt32 m_numberOfItems;
|
|
RChannelItemRef* m_baseItemArray;
|
|
RChannelItemRef* m_itemArray;
|
|
};
|
|
|
|
typedef DrRef<RChannelItemArray> RChannelItemArrayRef;
|