Documentation
¶
Index ¶
- func DecodeDictChunk(chunk *Chunk)
- func ReadDataPageValues(bytesReader *bytes.Reader, encodingMethod parquet.Encoding, ...) ([]interface{}, error)
- func ReadPageHeader(thriftReader *thrift.TBufferedTransport) (*parquet.PageHeader, error)
- type Chunk
- type DictRecType
- type Page
- func DictRecToDictPage(dictRec *DictRecType, pageSize int32, compressType parquet.CompressionCodec) (*Page, int64)
- func NewDataPage() *Page
- func NewDictPage() *Page
- func NewPage() *Page
- func ReadPage(thriftReader *thrift.TBufferedTransport, schemaHandler *schema.SchemaHandler, ...) (*Page, int64, int64, error)
- func ReadPage2(thriftReader *thrift.TBufferedTransport, schemaHandler *schema.SchemaHandler, ...) (*Page, int64, int64, error)
- func ReadPageRawData(thriftReader *thrift.TBufferedTransport, schemaHandler *schema.SchemaHandler, ...) (*Page, error)
- func TableToDataPages(table *Table, pageSize int32, compressType parquet.CompressionCodec) ([]*Page, int64)
- func TableToDictDataPages(dictRec *DictRecType, table *Table, pageSize int32, bitWidth int32, ...) ([]*Page, int64)
- func (page *Page) DataPageCompress(compressType parquet.CompressionCodec) []byte
- func (page *Page) DataPageV2Compress(compressType parquet.CompressionCodec) []byte
- func (page *Page) Decode(dictPage *Page)
- func (page *Page) DictDataPageCompress(compressType parquet.CompressionCodec, bitWidth int32, values []int32) []byte
- func (page *Page) DictPageCompress(compressType parquet.CompressionCodec, pT parquet.Type) []byte
- func (page *Page) EncodingValues(valuesBuf []interface{}) []byte
- func (p *Page) GetRLDLFromRawData(schemaHandler *schema.SchemaHandler) (int64, int64, error)
- func (p *Page) GetValueFromRawData(schemaHandler *schema.SchemaHandler) error
- type RowGroup
- type Table
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func ReadDataPageValues ¶
func ReadDataPageValues(bytesReader *bytes.Reader, encodingMethod parquet.Encoding, dataType parquet.Type, convertedType parquet.ConvertedType, cnt uint64, bitWidth uint64) ([]interface{}, error)
Read data page values
func ReadPageHeader ¶
func ReadPageHeader(thriftReader *thrift.TBufferedTransport) (*parquet.PageHeader, error)
Read page header
Types ¶
type Chunk ¶
type Chunk struct {
Pages []*Page
ChunkHeader *parquet.ColumnChunk
}
Chunk stores the ColumnChunk in parquet file
func PagesToDictChunk ¶
Convert several pages to one chunk with dict page first
func ReadChunk ¶
func ReadChunk(thriftReader *thrift.TBufferedTransport, schemaHandler *schema.SchemaHandler, chunkHeader *parquet.ColumnChunk) (*Chunk, error)
Read one chunk from parquet file (Deprecated)
type DictRecType ¶
func NewDictRec ¶
func NewDictRec(pT parquet.Type) *DictRecType
type Page ¶
type Page struct {
//Header of a page
Header *parquet.PageHeader
//Table to store values
DataTable *Table
//Compressed data of the page, which is written in parquet file
RawData []byte
//Compress type: gzip/snappy/zstd/none
CompressType parquet.CompressionCodec
//Schema
Schema *parquet.SchemaElement
//Path in schema(include the root)
Path []string
//Maximum of the values
MaxVal interface{}
//Minimum of the values
MinVal interface{}
//NullCount
NullCount *int64
//Tag info
Info *common.Tag
PageSize int32
}
Page is used to store the page data
func DictRecToDictPage ¶
func DictRecToDictPage(dictRec *DictRecType, pageSize int32, compressType parquet.CompressionCodec) (*Page, int64)
func ReadPage ¶
func ReadPage(thriftReader *thrift.TBufferedTransport, schemaHandler *schema.SchemaHandler, colMetaData *parquet.ColumnMetaData) (*Page, int64, int64, error)
Read page from parquet file
func ReadPage2 ¶
func ReadPage2(thriftReader *thrift.TBufferedTransport, schemaHandler *schema.SchemaHandler, colMetaData *parquet.ColumnMetaData) (*Page, int64, int64, error)
This is a test function
func ReadPageRawData ¶
func ReadPageRawData(thriftReader *thrift.TBufferedTransport, schemaHandler *schema.SchemaHandler, colMetaData *parquet.ColumnMetaData) (*Page, error)
Read page RawData
func TableToDataPages ¶
func TableToDataPages(table *Table, pageSize int32, compressType parquet.CompressionCodec) ([]*Page, int64)
Convert a table to data pages
func TableToDictDataPages ¶
func TableToDictDataPages(dictRec *DictRecType, table *Table, pageSize int32, bitWidth int32, compressType parquet.CompressionCodec) ([]*Page, int64)
Convert a table to dict data pages
func (*Page) DataPageCompress ¶
func (page *Page) DataPageCompress(compressType parquet.CompressionCodec) []byte
Compress the data page to parquet file
func (*Page) DataPageV2Compress ¶
func (page *Page) DataPageV2Compress(compressType parquet.CompressionCodec) []byte
Compress data page v2 to parquet file
func (*Page) DictDataPageCompress ¶
func (page *Page) DictDataPageCompress(compressType parquet.CompressionCodec, bitWidth int32, values []int32) []byte
Compress the data page to parquet file
func (*Page) DictPageCompress ¶
Compress the dict page to parquet file
func (*Page) EncodingValues ¶
Encoding values
func (*Page) GetRLDLFromRawData ¶
Get RepetitionLevels and Definitions from RawData
func (*Page) GetValueFromRawData ¶
func (p *Page) GetValueFromRawData(schemaHandler *schema.SchemaHandler) error
Get values from raw data
type RowGroup ¶
RowGroup stores the RowGroup in parquet file
func ReadRowGroup ¶
func ReadRowGroup(rowGroupHeader *parquet.RowGroup, PFile source.ParquetFile, schemaHandler *schema.SchemaHandler, NP int64) (*RowGroup, error)
Read one RowGroup from parquet file (Deprecated)
func (*RowGroup) RowGroupToTableMap ¶
Convert a RowGroup to table map
type Table ¶
type Table struct {
//Repetition type of the values: REQUIRED/OPTIONAL/REPEATED
RepetitionType parquet.FieldRepetitionType
//Schema
Schema *parquet.SchemaElement
//Path of this column
Path []string
//Maximum of definition levels
MaxDefinitionLevel int32
//Maximum of repetition levels
MaxRepetitionLevel int32
//Parquet values
Values []interface{}
//Definition Levels slice
DefinitionLevels []int32
//Repetition Levels slice
RepetitionLevels []int32
//Tag info
Info *common.Tag
}
Table is the core data structure used to store the values
func NewEmptyTable ¶
func NewEmptyTable() *Table