From c399dfd15682a69a787833acd68930892f66fe88 Mon Sep 17 00:00:00 2001
From: Alvaro Herrera <alvherre@alvh.no-ip.org>
Date: Sun, 30 Aug 2015 01:21:19 -0300
Subject: [PATCH 16/24] First column store implementation, colstore_dummy

This is a very simple module implementing the column store API.

Implementation notes:

- New subdir src/backend/colstore/

- The whole module is contained within a single file, colstore_dummy.c.
  We might want to create subdirs so that larger modules (>1 file) don't
  have their files mixed with other modules.
---
 src/backend/Makefile                  |   2 +-
 src/backend/colstore/Makefile         |  17 ++
 src/backend/colstore/colstore_dummy.c | 326 ++++++++++++++++++++++++++++++++++
 src/include/catalog/pg_proc.h         |   4 +
 src/include/colstore/colstore_dummy.h | 161 +++++++++++++++++
 5 files changed, 509 insertions(+), 1 deletion(-)
 create mode 100644 src/backend/colstore/Makefile
 create mode 100644 src/backend/colstore/colstore_dummy.c
 create mode 100644 src/include/colstore/colstore_dummy.h

diff --git a/src/backend/Makefile b/src/backend/Makefile
index 98b978f..66adc59 100644
--- a/src/backend/Makefile
+++ b/src/backend/Makefile
@@ -19,7 +19,7 @@ include $(top_builddir)/src/Makefile.global
 
 SUBDIRS = access bootstrap catalog parser commands executor foreign lib libpq \
 	main nodes optimizer port postmaster regex replication rewrite \
-	storage tcop tsearch utils $(top_builddir)/src/timezone
+	colstore storage tcop tsearch utils $(top_builddir)/src/timezone
 
 include $(srcdir)/common.mk
 
diff --git a/src/backend/colstore/Makefile b/src/backend/colstore/Makefile
new file mode 100644
index 0000000..9f2fbd4
--- /dev/null
+++ b/src/backend/colstore/Makefile
@@ -0,0 +1,17 @@
+#-------------------------------------------------------------------------
+#
+# Makefile--
+#    Makefile for colstore
+#
+# IDENTIFICATION
+#    src/backend/colstore/Makefile
+#
+#-------------------------------------------------------------------------
+
+subdir = src/backend/colstore
+top_builddir = ../../..
+include $(top_builddir)/src/Makefile.global
+
+OBJS = colstore_dummy.o
+
+include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/colstore/colstore_dummy.c b/src/backend/colstore/colstore_dummy.c
new file mode 100644
index 0000000..03238e0
--- /dev/null
+++ b/src/backend/colstore/colstore_dummy.c
@@ -0,0 +1,326 @@
+/*------------------------------------------------------------------------
+ * colstore_dummy.c
+ * 		Simple column store implementation for POSTGRES
+ *
+ * Copyright (c) 2015, PostgreSQL Global Development Group
+ *
+ * src/backend/colstore/colstore_dummy.c
+ *
+ *------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "colstore/colstoreapi.h"
+#include "colstore/colstore_dummy.h"
+#include "storage/bufmgr.h"
+#include "storage/lmgr.h"
+#include "storage/smgr.h"
+#include "utils/rel.h"
+
+PG_FUNCTION_INFO_V1(cstore_dummy_handler);
+
+static void cstore_dummy_insert(Relation rel,
+				Relation colstorerel, ColumnStoreInfo *info,
+				int natts, Datum *values, bool *nulls,
+				ItemPointer tupleid);
+static void cstore_dummy_batch_insert(Relation rel,
+				Relation colstorerel, ColumnStoreInfo *info,
+				int nrows, int natts, Datum **values, bool **nulls,
+				ItemPointer *tupleids);
+static Buffer get_colstore_buffer(Relation rel, Relation colstore);
+static int	ColumnarPageGetFreeItems(ColumnarPage page);
+
+
+Datum
+cstore_dummy_handler(PG_FUNCTION_ARGS)
+{
+        ColumnStoreRoutine *routine = makeNode(ColumnStoreRoutine);
+
+		routine->ExecColumnStoreInsert = cstore_dummy_insert;
+		routine->ExecColumnStoreBatchInsert = cstore_dummy_batch_insert;
+
+        PG_RETURN_POINTER(routine);
+}
+
+static void
+cstore_dummy_insert(Relation rel,
+					Relation colstorerel, ColumnStoreInfo *info,
+					int natts, Datum *values, bool *nulls,
+					ItemPointer tupleid)
+{
+	int i;
+	Buffer 				buffer = get_colstore_buffer(rel, colstorerel);
+	ColumnarPage 		page = BufferGetColumnarPage(buffer);
+	ColumnarPageHeader	header = (ColumnarPageHeader)page;
+
+	/* how many free item slots are on the current page? */
+	int				nitems = ColumnarPageGetFreeItems(page);
+
+	Assert(nitems > 0);
+
+	for (i = 0; i < header->pd_ncolumns; i++)
+	{
+		int byteIdx = (header->pd_nitems) / 8;
+		int bitIdx  = (header->pd_nitems) % 8;
+
+		/* copy the data in place */
+		memcpy(PageGetColumnDataNext(page, i),
+			   &values[i], PageGetColumnAttlen(page, i));
+
+		PageGetColumnDataAddBytes(page, i, PageGetColumnAttlen(page,i));
+
+		/* set the NULL bitmap */
+		*(PageGetColumnNulls(page, i) + byteIdx) &= (0x01 << bitIdx);
+		PageGetColumnNullsSetBytes(page, i, (byteIdx+1));
+	}
+
+	/* now set tuple ID */
+	memcpy(PageGetNextTupleId(page), tupleid, sizeof(ItemPointerData));
+
+	/* FIXME update min/max TID */
+
+	/* update number of items on the page */
+	header->pd_nitems += 1;
+
+	Assert(header->pd_nitems <= header->pd_maxitems);
+
+	PageSetChecksumInplace((Page)page, BufferGetBlockNumber(buffer));
+
+	LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
+	ReleaseBuffer(buffer);
+}
+
+static void
+cstore_dummy_batch_insert(Relation rel,
+				Relation colstorerel, ColumnStoreInfo *info,
+				int nrows, int natts, Datum **values, bool **nulls,
+				ItemPointer *tupleids)
+{
+	int		i,
+			j;
+	int		first = 0;
+
+	while (first < nrows)
+	{
+		Buffer 			buffer = get_colstore_buffer(rel, colstorerel);
+		ColumnarPage 	page = BufferGetColumnarPage(buffer);
+		ColumnarPageHeader header = (ColumnarPageHeader)page;
+
+		/* how many free item slots are on the current page? */
+		int				nitems = ColumnarPageGetFreeItems(page);
+
+		Assert(nitems > 0);
+
+		nitems = (nitems < (nrows - first)) ? nitems : (nrows - first);
+
+		for (i = 0; i < header->pd_ncolumns; i++)
+		{
+			for (j = 0; j < nitems; j++)
+			{
+				int byteIdx = (header->pd_nitems + j) / 8;
+				int bitIdx  = (header->pd_nitems + j) % 8;
+
+				/* copy the data in place */
+				memcpy(PageGetColumnDataNext(page, i),
+					   &values[i][first+j], PageGetColumnAttlen(page, i));
+
+				PageGetColumnDataAddBytes(page, i, PageGetColumnAttlen(page,i));
+
+				/* set the NULL bitmap */
+				*(PageGetColumnNulls(page, i) + byteIdx) &= (0x01 << bitIdx);
+				PageGetColumnNullsSetBytes(page, i, (byteIdx+1));
+			}
+		}
+
+		/* now set tuple IDs */
+		for (i = 0; i < nitems; i++)
+			memcpy(PageGetNextTupleId(page) + i * sizeof(ItemPointerData),
+				   &tupleids[i], sizeof(ItemPointerData));
+
+		/* FIXME update min/max TID */
+
+		/* update number of items on the page */
+		header->pd_nitems += nitems;
+		first += nitems;
+
+		Assert(header->pd_nitems <= header->pd_maxitems);
+
+		PageSetChecksumInplace((Page)page, BufferGetBlockNumber(buffer));
+
+		LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
+		ReleaseBuffer(buffer);
+	}
+}
+
+void
+ColumnarPageInit(ColumnarPage page, Size pageSize, Relation rel)
+{
+	int 				i;
+	ColumnarPageHeader	header;
+	TupleDesc			tupdesc;
+	Size				itemsize;
+	Size				freespace;
+	int					maxtuples = 0;
+	int					natts;
+	int					nnulls = 0;
+
+	/* zero the page first */
+	memset(page, 0, pageSize);
+
+	tupdesc = RelationGetDescr(rel);
+	natts   = tupdesc->natts;
+
+	header = (ColumnarPageHeader)page;
+
+	header->pd_ncolumns = natts;
+	header->pd_flags = 0;
+
+	/*
+	 * Set the pd_lower/upper/special in a sensible way - we don't use special
+	 * space, so we'll set pd_special to pageSize. And we'll set both pd_lower
+	 * and pd_upper right after the column info array, So the page seems to be
+	 * entirely full (pd_upper-pd_lower==0).
+	 *
+	 * XXX An alternative might be to store the column info structs in the
+	 *     special section, not sure if that's better.
+	 */
+	header->pd_lower = offsetof(ColumnarPageHeaderData, pd_columns)
+						 - natts * sizeof(ColumnInfoData);
+	header->pd_upper = header->pd_lower;
+	header->pd_special = pageSize;	/* no special */
+
+	PageSetPageSizeAndVersion(page, pageSize, PG_PAGE_LAYOUT_VERSION);
+
+	/* we need an item pointer for each 'row' */
+	itemsize = sizeof(ItemPointerData);
+
+	/* compute size of a single 'row' added to the page */
+	for (i = 0; i < natts; i++)
+	{
+		if (tupdesc->attrs[i]->attlen < 0)
+			elog(ERROR, "variable-length data types not supported yet");
+
+		itemsize += tupdesc->attrs[i]->attlen;
+
+		header->pd_columns[i].attnum     = tupdesc->attrs[i]->attnum;
+		header->pd_columns[i].attlen     = tupdesc->attrs[i]->attlen;
+		header->pd_columns[i].atttypid   = tupdesc->attrs[i]->atttypid;
+		header->pd_columns[i].attnotnull = tupdesc->attrs[i]->attnotnull;
+
+		nnulls += (header->pd_columns[i].attnotnull) ? 0 : 1;
+	}
+
+	freespace = pageSize - offsetof(ColumnarPageHeaderData, pd_columns)
+						 - natts * sizeof(ColumnInfoData);
+
+	/*
+	 * We'll do a bit arithmetics magic, because we need to include NULLs,
+	 * because 8 rows needs 1 byte in NULL bitmap
+	 */
+	maxtuples = 8 * freespace / (itemsize * 8 + nnulls);
+
+	/*
+	 * We haven't considered alignment yet, so let's see if we fit on the page
+	 * (and if not, decrement the number of items until we do).
+	 */
+	while (true)
+	{
+		Size	offset = offsetof(ColumnarPageHeaderData, pd_columns)
+						 + natts * sizeof(ColumnInfoData);
+
+		for (i = 0; i < natts; i++)
+		{
+			offset = MAXALIGN(offset);
+			header->pd_columns[i].data_start = offset;
+
+			/* space for data */
+			offset += maxtuples * tupdesc->attrs[i]->attlen;
+
+			offset = MAXALIGN(offset);
+			header->pd_columns[i].null_start = offset;
+
+			/* NULL bitmap size */
+			offset += (maxtuples + 7) / 8;
+		}
+
+		/* and finally one item pointer for each row */
+		offset = MAXALIGN(offset);
+
+		header->pd_tupleids = offset;
+		offset += maxtuples * sizeof(ItemPointerData);
+
+		/* if we fit onto a page, terminate, otherwise decrement maxtuples */
+		if (offset <= pageSize)
+			break;
+
+		maxtuples--;
+	}
+
+	/* remember the max number of tuples */
+	header->pd_maxitems = maxtuples;
+
+	return;
+}
+
+static Buffer
+get_colstore_buffer(Relation rel, Relation colstore)
+{
+	Buffer			buffer;
+	ColumnarPage	page;
+	BlockNumber		targetBlock = InvalidBlockNumber;
+	bool			needLock = !RELATION_IS_LOCAL(rel);	/* check the parent */
+	BlockNumber		nblocks = RelationGetNumberOfBlocks(colstore);
+
+	/* we'll always try the last block first, and then possibly extend */
+	if (nblocks > 0)
+		targetBlock = nblocks - 1;
+
+	/* get the last block (if the relation is empty, just do the extension) */
+	if (targetBlock != InvalidBlockNumber)
+	{
+		buffer = ReadBuffer(colstore, targetBlock);
+
+		LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+
+		page = BufferGetColumnarPage(buffer);
+
+		/* if there's enough space for another item, we're done */
+		if (ColumnarPageGetFreeItems(page) > 0)
+			return buffer;
+
+		/* otherwise, let's allocate a new page at the end */
+		LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
+		ReleaseBuffer(buffer);
+	}
+
+	if (needLock)
+		LockRelationForExtension(colstore, ExclusiveLock);
+
+	buffer = ReadBuffer(colstore, P_NEW);
+
+	/*
+	 * Now acquire lock on the new page.
+	 */
+	LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+
+	if (needLock)
+		UnlockRelationForExtension(colstore, ExclusiveLock);
+
+	page = BufferGetColumnarPage(buffer);
+
+	ColumnarPageInit(page, BufferGetPageSize(buffer), colstore);
+
+	RelationSetTargetBlock(colstore, BufferGetBlockNumber(buffer));
+
+	MarkBufferDirty(buffer);
+
+	return buffer;
+}
+
+static int
+ColumnarPageGetFreeItems(ColumnarPage page)
+{
+	ColumnarPageHeader header = (ColumnarPageHeader) page;
+
+	return (header->pd_maxitems - header->pd_nitems);
+}
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index c5befcb..72c98ad 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -3751,6 +3751,10 @@ DESCR("BERNOULLI tablesample method handler");
 DATA(insert OID = 3314 (  system			PGNSP PGUID 12 1 0 0 0 f f f f t f v 1 0 3310 "2281" _null_ _null_ _null_ _null_ _null_ tsm_system_handler _null_ _null_ _null_ ));
 DESCR("SYSTEM tablesample method handler");
 
+/* column stores */
+DATA(insert OID = 3354 (  cstore_dummy_handler	PGNSP PGUID 12 1 0 0 0 f f f f t f i 0 0 3351 "" _null_ _null_ _null_ _null_ _null_ cstore_dummy_handler _null_ _null_ _null_ ));
+DESCR("dummy column store method");
+
 /* cryptographic */
 DATA(insert OID =  2311 (  md5	   PGNSP PGUID 12 1 0 0 0 f f f f t f i 1 0 25 "25" _null_ _null_ _null_ _null_ _null_ md5_text _null_ _null_ _null_ ));
 DESCR("MD5 hash");
diff --git a/src/include/colstore/colstore_dummy.h b/src/include/colstore/colstore_dummy.h
new file mode 100644
index 0000000..372eeff
--- /dev/null
+++ b/src/include/colstore/colstore_dummy.h
@@ -0,0 +1,161 @@
+/*-------------------------------------------------------------------------
+ *
+ * colstore_dummy.h
+ *	  API for column store implementations
+ *
+ * Copyright (c) 2010-2015, PostgreSQL Global Development Group
+ *
+ * src/include/colstore/colstore_dummy.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef COLSTOREDUMMY_H
+#define COLSTOREDUMMY_H
+
+#include "access/attnum.h"
+#include "access/xlogdefs.h"
+#include "storage/block.h"
+#include "storage/bufpage.h"
+#include "storage/item.h"
+#include "storage/itemptr.h"
+#include "storage/off.h"
+#include "utils/rel.h"
+
+/*
+ * A columnar disk page is an abstraction layered on top of a postgres
+ * disk block (which is simply a unit of i/o, see block.h).
+ *
+ * specifically, while a disk block can be unformatted, a columnar disk
+ * page format depends on the particular column store implementation.
+ * For the 'dummy' implementation, it is a slotted page of the form:
+ *
+ * +----------------+-------+-----------------+-----------------+
+ * | ColumnarPageHeaderData | ColumnInfoData1 | ColumnInfoData2 |
+ * +-------+----------------+-+---------------+-----------------+
+ * |  ...   ColumnInfoDataN   |           tuple IDs             |
+ * +---------------------+----+----------------+----------------+
+ * |    column 1 data    |    column 2 data    |      ....      |
+ * +---------------------+---------------------+----------------+
+ * |                                                            |
+ * |                                                            |
+ * +---------------------------------+--------------------------+
+ * |        ...                      |       column N data      |
+ * +---------------------------------+--------------------------+
+ *
+ * a page is full when a new tuple can't be added (even after moving
+ * the data around, compressing etc.)
+ *
+ * all blocks written out by an access method must be disk pages.
+ *
+ * EXCEPTIONS:
+ *
+ * obviously, a page is not formatted before it is initialized by
+ * a call to ColumnarPageInit.
+ *
+ * NOTES:
+ *
+ * The tuple IDs contain tuple IDs for all tuples stored on this page,
+ * providing a mapping to the heap part. It's good to keep this array
+ * sorted, as that makes lookup faster. It's also possible to encode
+ * this array using RLE, for example (again, that works better for
+ * sorted data). There's also a min/max TID in the page header.
+ *
+ * The 'column data' combine all the data for a column, i.e. the actual
+ * values and NULL bitmap. The data may be partially compressed, etc.
+ *
+ * Some of the page fields may seem too big (e.g. 32 bits for nitems seems
+ * a bit over the top, but (a) 16 bits is just on the border for 64kB pages
+ * (and larger pages may get supported in the future), (b) we do expect
+ * efficient storage of some data types (e.g. bool type in 1 bit). That makes
+ * the 16bit data type inadequate.
+ *
+ * We must however keep the beginning of the header exactly the same as for
+ * regular pages, so that the checksum / validation stuff works.
+ */
+
+typedef Pointer ColumnarPage;
+
+typedef struct ColumnInfoData
+{
+	AttrNumber		attnum;
+	int				attlen;
+	Oid				atttypid;
+	bool			attnotnull;
+	LocationIndex	data_start;
+	LocationIndex	data_bytes;
+	LocationIndex	null_start;
+	LocationIndex	null_bytes;
+} ColumnInfoData;
+
+typedef struct ColumnarPageHeaderData
+{
+	/* XXX LSN is member of *any* block, not only page-organized ones */
+	PageXLogRecPtr	pd_lsn;		/* LSN: next byte after last byte of xlog
+								 * record for last change to this page */
+	uint16		pd_checksum;	/* checksum */
+	uint16		pd_flags;		/* flag bits, see below */
+	LocationIndex pd_lower;		/* offset to start of free space */
+	LocationIndex pd_upper;		/* offset to end of free space */
+	LocationIndex pd_special;	/* offset to start of special space */
+	uint16		pd_pagesize_version;
+
+	/* our fields start here */
+	LocationIndex pd_tupleids;	/* offset of tuple IDs */
+	uint16		pd_ncolumns;	/* number of columns on the page */
+	uint32		pd_nitems;		/* number of items on the page */
+	uint32		pd_maxitems;	/* max number of items on the page */
+	ItemPointerData	pd_min_tid;	/* mininum TID placed on page */
+	ItemPointerData pd_max_tid;	/* maximum TID placed on page */
+	ColumnInfoData	pd_columns[FLEXIBLE_ARRAY_MEMBER]; /* column info array */
+} ColumnarPageHeaderData;
+
+typedef ColumnarPageHeaderData *ColumnarPageHeader;
+
+#define BufferGetColumnarPage(buffer) ((ColumnarPage)BufferGetBlock(buffer))
+
+#define PageGetNumOfItems(page) \
+	(((ColumnarPageHeader)page)->pd_nitems)
+
+#define PageGetColumnInfo(page, column) \
+	(((ColumnarPageHeader)page)->pd_columns[column])
+
+#define PageGetColumnAttlen(page, column) \
+	(PageGetColumnInfo(page,column).attlen)
+
+#define PageGetColumnDataOffset(page, column) \
+	(PageGetColumnInfo(page,column).data_start)
+
+#define PageGetColumnDataBytes(page, column) \
+	(PageGetColumnInfo(page,column).data_bytes)
+
+#define PageGetColumnDataAddBytes(page, column, len) \
+	(PageGetColumnInfo(page,column).data_bytes += len)
+
+#define PageGetColumnDataOffsetNext(page, column) \
+	(PageGetColumnDataOffset(page, column) + \
+	 PageGetColumnDataBytes(page, column))
+
+#define PageGetColumnNullsOffset(page, column) \
+	(((ColumnarPageHeader)page)->pd_columns[column].null_start)
+
+#define PageGetColumnData(page,column) \
+	((char*)page + PageGetColumnDataOffset(page, column))
+
+#define PageGetColumnDataNext(page,column) \
+	((char*)page + PageGetColumnDataOffsetNext(page, column))
+
+#define PageGetColumnNulls(page,column) \
+	((char*)page + PageGetColumnNullsOffset(page, column))
+
+#define PageGetColumnNullsSetBytes(page,column, len) \
+	(PageGetColumnInfo(page,column).null_bytes = len)
+
+#define PageGetTupleIds(page) \
+	((char*)page + (((ColumnarPageHeader)page)->pd_tupleids))
+
+#define PageGetNextTupleId(page) \
+	((ItemPointer)PageGetTupleIds(page) + PageGetNumOfItems(page))
+
+extern void ColumnarPageInit(ColumnarPage page, Size pageSize, Relation rel);
+
+#endif   /* COLSTOREDUMMY_H */
-- 
2.1.4

