Index: src/backend/access/heap/Makefile
===================================================================
RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/backend/access/heap/Makefile,v
retrieving revision 1.15
diff -c -r1.15 Makefile
*** src/backend/access/heap/Makefile	8 Apr 2007 01:26:27 -0000	1.15
--- src/backend/access/heap/Makefile	31 May 2007 10:21:28 -0000
***************
*** 12,18 ****
  top_builddir = ../../../..
  include $(top_builddir)/src/Makefile.global
  
! OBJS = heapam.o hio.o rewriteheap.o tuptoaster.o
  
  all: SUBSYS.o
  
--- 12,18 ----
  top_builddir = ../../../..
  include $(top_builddir)/src/Makefile.global
  
! OBJS = heapam.o hio.o rewriteheap.o tuptoaster.o syncscan.o
  
  all: SUBSYS.o
  
Index: src/backend/access/heap/heapam.c
===================================================================
RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/backend/access/heap/heapam.c,v
retrieving revision 1.234
diff -c -r1.234 heapam.c
*** src/backend/access/heap/heapam.c	30 May 2007 20:11:53 -0000	1.234
--- src/backend/access/heap/heapam.c	4 Jun 2007 08:41:56 -0000
***************
*** 85,104 ****
  
  	/*
  	 * If the table is large relative to NBuffers, use a bulk-read access
! 	 * strategy, else use the default random-access strategy.  During a
! 	 * rescan, don't make a new strategy object if we don't have to.
  	 */
  	if (scan->rs_nblocks > NBuffers / 4 &&
  		!scan->rs_rd->rd_istemp)
  	{
  		if (scan->rs_strategy == NULL)
  			scan->rs_strategy = GetAccessStrategy(BAS_BULKREAD);
  	}
  	else
  	{
  		if (scan->rs_strategy != NULL)
  			FreeAccessStrategy(scan->rs_strategy);
  		scan->rs_strategy = NULL;
  	}
  
  	scan->rs_inited = false;
--- 85,108 ----
  
  	/*
  	 * If the table is large relative to NBuffers, use a bulk-read access
! 	 * strategy and enable synchronized scanning (see syncscan.c). 
! 	 * During a rescan, don't make a new strategy object if we don't have to.
  	 */
  	if (scan->rs_nblocks > NBuffers / 4 &&
  		!scan->rs_rd->rd_istemp)
  	{
  		if (scan->rs_strategy == NULL)
  			scan->rs_strategy = GetAccessStrategy(BAS_BULKREAD);
+ 
+ 		scan->rs_startpage = ss_get_location(scan);
  	}
  	else
  	{
  		if (scan->rs_strategy != NULL)
  			FreeAccessStrategy(scan->rs_strategy);
  		scan->rs_strategy = NULL;
+ 
+ 		scan->rs_startpage = 0;
  	}
  
  	scan->rs_inited = false;
***************
*** 229,234 ****
--- 233,239 ----
  	Snapshot	snapshot = scan->rs_snapshot;
  	bool		backward = ScanDirectionIsBackward(dir);
  	BlockNumber page;
+ 	BlockNumber prevpage;
  	Page		dp;
  	int			lines;
  	OffsetNumber lineoff;
***************
*** 251,257 ****
  				tuple->t_data = NULL;
  				return;
  			}
! 			page = 0;			/* first page */
  			heapgetpage(scan, page);
  			lineoff = FirstOffsetNumber;		/* first offnum */
  			scan->rs_inited = true;
--- 256,262 ----
  				tuple->t_data = NULL;
  				return;
  			}
! 			page = scan->rs_startpage;			/* first page */
  			heapgetpage(scan, page);
  			lineoff = FirstOffsetNumber;		/* first offnum */
  			scan->rs_inited = true;
***************
*** 276,281 ****
--- 281,290 ----
  	{
  		if (!scan->rs_inited)
  		{
+ 			/* Note: This is not normally reached, because this case is optimized
+ 			 * away in the executor.
+ 			 */
+ 
  			/*
  			 * return null immediately if relation is empty
  			 */
***************
*** 285,291 ****
  				tuple->t_data = NULL;
  				return;
  			}
! 			page = scan->rs_nblocks - 1;		/* final page */
  			heapgetpage(scan, page);
  		}
  		else
--- 294,305 ----
  				tuple->t_data = NULL;
  				return;
  			}
! 			/* start from last page of the scan */ 
! 			if (scan->rs_startpage == 0)
! 				page = scan->rs_nblocks - 1;
! 			else
! 				page = scan->rs_startpage - 1;
! 
  			heapgetpage(scan, page);
  		}
  		else
***************
*** 398,406 ****
  		LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
  
  		/*
! 		 * return NULL if we've exhausted all the pages
  		 */
! 		if (backward ? (page == 0) : (page + 1 >= scan->rs_nblocks))
  		{
  			if (BufferIsValid(scan->rs_cbuf))
  				ReleaseBuffer(scan->rs_cbuf);
--- 412,439 ----
  		LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
  
  		/*
! 		 * Handle wrap-around correctly, we might have started from somewhere
! 		 * in the middle of the relation.
! 		 */
! 		prevpage = page;
! 		if (backward)
! 		{
! 			if (page == 0)
! 				page = scan->rs_nblocks;
! 			page--;
! 		}
! 		else
! 		{
! 			page++;
! 			if (page == scan->rs_nblocks)
! 				page = 0;
! 		}
! 
! 		/*
! 		 * return NULL if we've exhausted all the pages.
  		 */
! 		if((ScanDirectionIsForward(dir)  && page == scan->rs_startpage) ||
! 		   (ScanDirectionIsBackward(dir) && prevpage == scan->rs_startpage))
  		{
  			if (BufferIsValid(scan->rs_cbuf))
  				ReleaseBuffer(scan->rs_cbuf);
***************
*** 411,420 ****
  			return;
  		}
  
- 		page = backward ? (page - 1) : (page + 1);
- 
  		heapgetpage(scan, page);
  
  		LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
  
  		dp = (Page) BufferGetPage(scan->rs_cbuf);
--- 444,455 ----
  			return;
  		}
  
  		heapgetpage(scan, page);
  
+ 		/* Report our new scan position for synchronization purposes */
+ 		if (scan->rs_strategy != NULL)
+ 			ss_report_location(scan, page);
+ 
  		LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
  
  		dp = (Page) BufferGetPage(scan->rs_cbuf);
***************
*** 455,460 ****
--- 490,496 ----
  	HeapTuple	tuple = &(scan->rs_ctup);
  	bool		backward = ScanDirectionIsBackward(dir);
  	BlockNumber page;
+ 	BlockNumber prevpage;
  	Page		dp;
  	int			lines;
  	int			lineindex;
***************
*** 478,484 ****
  				tuple->t_data = NULL;
  				return;
  			}
! 			page = 0;			/* first page */
  			heapgetpage(scan, page);
  			lineindex = 0;
  			scan->rs_inited = true;
--- 514,520 ----
  				tuple->t_data = NULL;
  				return;
  			}
! 			page = scan->rs_startpage;
  			heapgetpage(scan, page);
  			lineindex = 0;
  			scan->rs_inited = true;
***************
*** 500,505 ****
--- 536,545 ----
  	{
  		if (!scan->rs_inited)
  		{
+ 			/* Note: This is not normally reached, because this case is optimized
+ 			 * away in the executor.
+ 			 */
+ 
  			/*
  			 * return null immediately if relation is empty
  			 */
***************
*** 509,515 ****
  				tuple->t_data = NULL;
  				return;
  			}
! 			page = scan->rs_nblocks - 1;		/* final page */
  			heapgetpage(scan, page);
  		}
  		else
--- 549,560 ----
  				tuple->t_data = NULL;
  				return;
  			}
! 			/* start from last page of the scan */ 
! 			if (scan->rs_startpage == 0)
! 				page = scan->rs_nblocks - 1;
! 			else
! 				page = scan->rs_startpage - 1;
! 
  			heapgetpage(scan, page);
  		}
  		else
***************
*** 613,626 ****
  		}
  
  		/*
! 		 * if we get here, it means we've exhausted the items on this page and
! 		 * it's time to move to the next.
  		 */
  
  		/*
! 		 * return NULL if we've exhausted all the pages
  		 */
! 		if (backward ? (page == 0) : (page + 1 >= scan->rs_nblocks))
  		{
  			if (BufferIsValid(scan->rs_cbuf))
  				ReleaseBuffer(scan->rs_cbuf);
--- 658,686 ----
  		}
  
  		/*
! 		 * If we get here, it means we've exhausted the items on this page and
! 		 * it's time to move to the next.  Handle wrap around correctly;
! 		 * we might have started from somewhere in the middle of the relation.
  		 */
+ 		prevpage = page;
+ 		if (backward)
+ 		{
+ 			if (page == 0)
+ 				page = scan->rs_nblocks;
+ 			page--;
+ 		}
+ 		else
+ 		{
+ 			page++;
+ 			if (page == scan->rs_nblocks)
+ 				page = 0;
+ 		}
  
  		/*
! 		 * return NULL if we've exhausted all the pages.
  		 */
! 		if((ScanDirectionIsForward(dir)  && page == scan->rs_startpage) ||
! 		   (ScanDirectionIsBackward(dir) && prevpage == scan->rs_startpage))
  		{
  			if (BufferIsValid(scan->rs_cbuf))
  				ReleaseBuffer(scan->rs_cbuf);
***************
*** 631,639 ****
  			return;
  		}
  
- 		page = backward ? (page - 1) : (page + 1);
  		heapgetpage(scan, page);
  
  		dp = (Page) BufferGetPage(scan->rs_cbuf);
  		lines = scan->rs_ntuples;
  		linesleft = lines;
--- 691,701 ----
  			return;
  		}
  
  		heapgetpage(scan, page);
  
+ 		if (scan->rs_strategy != NULL)
+ 			ss_report_location(scan,page);
+ 
  		dp = (Page) BufferGetPage(scan->rs_cbuf);
  		lines = scan->rs_ntuples;
  		linesleft = lines;
Index: src/backend/access/heap/syncscan.c
===================================================================
RCS file: src/backend/access/heap/syncscan.c
diff -N src/backend/access/heap/syncscan.c
*** /dev/null	1 Jan 1970 00:00:00 -0000
--- src/backend/access/heap/syncscan.c	4 Jun 2007 08:57:16 -0000
***************
*** 0 ****
--- 1,287 ----
+ /*-------------------------------------------------------------------------
+  *
+  * syncscan.c
+  *	  heap scan synchronization support
+  *
+  * When multiple backends run a sequential scan on the same table, we try 
+  * to keep them synchronized to reduce the overall I/O needed. The goal is
+  * to read in each page only once to shared buffer cache, and let all backends
+  * that take part in the scan to process the page before it falls out of the
+  * cache.
+  *
+  * To accomplish that, we keep track of the scan position of each table, and
+  * start new scans close to where the previous scan(s) are. Assuming that I/O
+  * is slow compared to the processing of each page, that's enough to keep the
+  * scans synchronized. We don't try do any further synchronization to keep the
+  * scans together; some scans might progress much more slowly than others if
+  * for example the results need to be transferred to the client over a slow
+  * network, and we don't want such queries to slow down others.
+  *
+  * There can realistically only be a few sequential scans on different tables
+  * in progress at any time. Therefore we just keep the scan positions in a
+  * small LRU list which we scan every time we need to look up or update a scan
+  * position. If that ever becomes a scalability issue, we could put the entries
+  * to a hash table.
+  *
+  * INTERFACE ROUTINES
+  *		ss_get_location		- return current scan location of a relation
+  *		ss_report_location	- update current scan location
+  *
+  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
+  * Portions Copyright (c) 1994, Regents of the University of California
+  *
+  * IDENTIFICATION
+  *	  $PostgreSQL$
+  *
+  *-------------------------------------------------------------------------
+  */
+ #include "postgres.h"
+ 
+ #include "access/heapam.h"
+ #include "miscadmin.h"
+ 
+ /*
+  * Size of the LRU list.
+  *
+  * Note: the code assumes that SYNC_SCAN_NELEM > 1.
+  *
+  * XXX: What's a good value? It should be large enough to hold the
+  * maximum number large tables scanned simultaneously. But a larger value
+  * means more traversing of the LRU list when starting a new scan.
+  */
+ #define SYNC_SCAN_NELEM 20
+ 
+ /*
+  * Interval between reports of the location of the current scan, in pages.
+  *
+  * Note: This should be smaller than the ring size (see freelist.c) we use
+  * for bulk reads. Otherwise a scan joining other scans might start from a
+  * page that's no longer in the buffer cache, and needs to read it back 
+  * again. This is a bit fuzzy; there's no guarantee that the new scan
+  * will read the page until it leaves the buffer cache anyway, and on the
+  * other hand the page is most likely still in the OS cache.
+  */
+ #define SYNC_SCAN_REPORT_INTERVAL (128 * 1024 / BLCKSZ)
+ 
+ 
+ /* GUC variables */
+ bool Trace_sync_seqscan = false;
+ 
+ /* 
+  * The scan locations structure is essentially a doubly-linked LRU with head
+  * and tail pointer, but designed to hold a fixed maximum number of elements in
+  * fixed-size shared memory.
+  */
+ typedef struct ss_scan_location_t {
+ 	RelFileNode relfilenode;	/* The relfilenode that tags this entry */
+ 	BlockNumber location;		/* The location in the relation */
+ } ss_scan_location_t;
+ 
+ typedef struct ss_lru_item_t {
+ 	struct ss_lru_item_t	*prev;
+ 	struct ss_lru_item_t	*next;
+ 	ss_scan_location_t		location;
+ } ss_lru_item_t;
+ 
+ typedef struct ss_scan_locations_t {
+ 	ss_lru_item_t		*head;
+ 	ss_lru_item_t		*tail;
+ 	ss_lru_item_t		items[1]; /* SYNC_SCAN_NELEM items */
+ } ss_scan_locations_t;
+ 
+ #define SizeOfScanLocations offsetof(ss_scan_locations_t, items[SYNC_SCAN_NELEM])
+ 
+ /* Pointer to struct in shared memory */
+ static ss_scan_locations_t *scan_locations;
+ 
+ /* prototypes for internal functions */
+ static BlockNumber ss_search(RelFileNode,BlockNumber,bool);
+ 
+ 
+ /*
+  * SyncScanShmemSize --- report amount of shared memory space needed
+  */
+ Size SyncScanShmemSize(void)
+ {
+ 	return SizeOfScanLocations;
+ }
+ 
+ /*
+  * SyncScanShmemInit --- initialize this module's shared memory
+  */
+ void SyncScanShmemInit()
+ {
+ 	int i;
+ 	bool found;
+ 	
+ 	scan_locations = (ss_scan_locations_t *) 
+ 		ShmemInitStruct("Sync Scan Locations List", 
+ 						SizeOfScanLocations, &found);
+ 
+ 	if(!IsUnderPostmaster)
+ 	{
+ 		/* Initialize shared memory area */
+ 		Assert(!found);
+ 
+ 		scan_locations->head = &scan_locations->items[0];
+ 		scan_locations->tail = &scan_locations->items[SYNC_SCAN_NELEM - 1];
+ 
+ 		for(i = 0; i < SYNC_SCAN_NELEM; i++)
+ 		{
+ 			ss_lru_item_t *item = &scan_locations->items[i];
+ 
+ 			/* Initialize all slots with invalid values. As scans are started,
+ 			 * these invalid entries will fall off the LRU list and are 
+ 			 * replaced with real entries.
+ 			 */
+ 			item->location.relfilenode.spcNode = InvalidOid;
+ 			item->location.relfilenode.dbNode = InvalidOid;
+ 			item->location.relfilenode.relNode = InvalidOid;
+ 			item->location.location = InvalidBlockNumber;
+ 
+ 			item->prev = (i > 0) ? 
+ 				(&scan_locations->items[i - 1]) : NULL;
+ 			item->next = (i < SYNC_SCAN_NELEM - 1) ?
+ 				(&scan_locations->items[i + 1]) : NULL;
+ 		}
+ 	}
+ 	else
+ 		Assert(found);
+ 
+ }
+ 
+ /* 
+  * ss_search --- searches the scan_locations structure for an entry with the
+  *		given relfilenode.
+  *
+  * If "set" is true, the location is updated to the given location.  If no
+  * entry for the given relfilenode is found, it will be created at the head
+  * of the list with the given location, even if set == false.
+  *
+  * In any case, the location after possible update is returned.
+  */
+ static BlockNumber ss_search(RelFileNode relfilenode, 
+ 	BlockNumber location, bool set)
+ {
+ 	ss_lru_item_t	*item;
+ 
+ 	item = scan_locations->head;
+ 
+ 	for (;;)
+ 	{
+ 		bool match;
+ 
+ 		match = RelFileNodeEquals(item->location.relfilenode, relfilenode);
+ 
+ 		if (match || item->next == NULL)
+ 		{
+ 
+ 			/*
+ 			 * If we reached the end of list and no match was found, 
+ 			 * take over the last entry
+ 			 */
+ 			if (!match)
+ 			{
+ 				item->location.relfilenode = relfilenode;
+ 				item->location.location = location;
+ 			}
+ 			else 
+ 			if (set)
+ 				item->location.location = location;
+ 
+ 			/* Move the entry to the front of the LRU list */
+ 			if (item != scan_locations->head)
+ 			{
+ 				/* unlink */
+ 				if(item == scan_locations->tail)
+ 					scan_locations->tail = item->prev;
+ 				item->prev->next = item->next;
+ 				if(item->next)
+ 					item->next->prev = item->prev;
+ 
+ 				/* link */
+ 				item->prev = NULL;
+ 				item->next = scan_locations->head;
+ 				scan_locations->head->prev = item;
+ 				scan_locations->head = item;
+ 			}
+ 
+ 			return item->location.location;
+ 		}
+ 		item = item->next;
+ 	}
+ 	/* never reached */
+ }
+ 
+ /*
+  * ss_get_location --- gets the optimal starting location for scan
+  *
+  * Returns the location of an already running sequential scan on the
+  * relation, or 0 if no valid location is found.
+  *
+  * This function assumes that scan->rs_nblocks is already properly set.
+  */
+ BlockNumber ss_get_location(HeapScanDesc scan)
+ {
+ 	BlockNumber startloc;
+ 
+ 	LWLockAcquire(SyncScanLock,LW_EXCLUSIVE);
+ 	startloc = ss_search(scan->rs_rd->rd_node, 0, false);
+ 	LWLockRelease(SyncScanLock);
+ 
+ 	/*
+ 	 * If the location is not a valid block number for this scan, start at 0.
+ 	 *
+ 	 * This can happen if for instance a VACUUM truncated the table
+ 	 * since the location was set. 
+ 	 */
+ 	if(startloc < 0 || startloc >= scan->rs_nblocks) 
+ 	{
+ 		if(Trace_sync_seqscan)
+ 			elog(DEBUG1,"SYNC_SCAN: Location %d out of range. Relation has %d pages.",
+ 				startloc, scan->rs_nblocks);
+ 		return 0;
+ 	}
+ 
+ 	if(Trace_sync_seqscan)
+ 		elog(DEBUG1,"SYNC_SCAN: START: OID = %d; Location = %d; Size: %d",
+ 			 RelationGetRelid(scan->rs_rd), startloc, scan->rs_nblocks);
+ 
+ 	return startloc;
+ }
+ 
+ /* 
+  * ss_report_location --- updates the current scan location
+  *
+  * Writes an entry in the Sync Scan structure of the form
+  * (relfilenode,blocknumber), overwriting any existing entry for the same
+  * relfilenode.
+  */
+ void ss_report_location(HeapScanDesc scan, BlockNumber location) 
+ {
+ 	if(Trace_sync_seqscan)
+ 	{
+ 		if ((location % 1000) == 0)
+ 			elog(DEBUG2, "SYNC_SCAN: page %d", location);
+ 	}
+ 
+ 	/*
+ 	 * To reduce lock contention, only report scan progress every n pages.
+ 	 * For the same reason, don't block if the lock isn't immediately
+ 	 * available. Missing a few updates isn't critical, it just means that a
+ 	 * new scan that wants to join the pack will start a little bit behind the
+ 	 * head of the scan. Hopefully the pages are still in OS cache and the 
+ 	 * scan catches up quickly.
+ 	 */
+ 	if ((location % SYNC_SCAN_REPORT_INTERVAL) == 0)
+ 	{
+ 		if (LWLockConditionalAcquire(SyncScanLock, LW_EXCLUSIVE))
+ 		{
+ 			ss_search(scan->rs_rd->rd_node,location,true);
+ 			LWLockRelease(SyncScanLock);
+ 		}
+ 		else if (Trace_sync_seqscan)
+ 			elog(DEBUG1, "SYNC_SCAN: missed update %d", location);
+ 	}
+ }
+ 
Index: src/backend/storage/ipc/ipci.c
===================================================================
RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/backend/storage/ipc/ipci.c,v
retrieving revision 1.91
diff -c -r1.91 ipci.c
*** src/backend/storage/ipc/ipci.c	15 Feb 2007 23:23:23 -0000	1.91
--- src/backend/storage/ipc/ipci.c	31 May 2007 10:21:28 -0000
***************
*** 19,24 ****
--- 19,25 ----
  #include "access/nbtree.h"
  #include "access/subtrans.h"
  #include "access/twophase.h"
+ #include "access/heapam.h"
  #include "miscadmin.h"
  #include "pgstat.h"
  #include "postmaster/autovacuum.h"
***************
*** 112,117 ****
--- 113,119 ----
  		size = add_size(size, BgWriterShmemSize());
  		size = add_size(size, AutoVacuumShmemSize());
  		size = add_size(size, BTreeShmemSize());
+ 		size = add_size(size, SyncScanShmemSize());
  #ifdef EXEC_BACKEND
  		size = add_size(size, ShmemBackendArraySize());
  #endif
***************
*** 216,221 ****
--- 218,224 ----
  	 * Set up other modules that need some shared memory space
  	 */
  	BTreeShmemInit();
+ 	SyncScanShmemInit();
  
  #ifdef EXEC_BACKEND
  
Index: src/backend/utils/misc/guc.c
===================================================================
RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/backend/utils/misc/guc.c,v
retrieving revision 1.391
diff -c -r1.391 guc.c
*** src/backend/utils/misc/guc.c	8 May 2007 16:33:51 -0000	1.391
--- src/backend/utils/misc/guc.c	2 Jun 2007 07:50:23 -0000
***************
*** 26,31 ****
--- 26,32 ----
  #endif
  
  
+ #include "access/heapam.h"
  #include "access/gin.h"
  #include "access/transam.h"
  #include "access/twophase.h"
***************
*** 783,788 ****
--- 784,799 ----
  		false, NULL, NULL
  	},
  
+ 	{
+ 		{"trace_sync_seqscan", PGC_USERSET, DEVELOPER_OPTIONS,
+ 			gettext_noop("Generates debugging output for Synchronized Scans."),
+ 			NULL,
+ 			GUC_NOT_IN_SAMPLE
+ 		},
+ 		&Trace_sync_seqscan,
+ 		false, NULL, NULL
+ 	},
+ 
  #ifdef LOCK_DEBUG
  	{
  		{"trace_locks", PGC_SUSET, DEVELOPER_OPTIONS,
Index: src/include/access/heapam.h
===================================================================
RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/include/access/heapam.h,v
retrieving revision 1.124
diff -c -r1.124 heapam.h
*** src/include/access/heapam.h	27 May 2007 03:50:39 -0000	1.124
--- src/include/access/heapam.h	2 Jun 2007 07:50:30 -0000
***************
*** 25,30 ****
--- 25,33 ----
  #include "utils/rel.h"
  #include "utils/tqual.h"
  
+ /* GUC variable, in syncscan.c */
+ extern bool Trace_sync_seqscan;
+ 
  /* ----------------
   *		fastgetattr
   *
***************
*** 240,243 ****
--- 243,252 ----
  
  extern void heap_sync(Relation relation);
  
+ /* in heapam/syncscan.c */
+ extern void ss_report_location(HeapScanDesc scan, BlockNumber location);
+ extern BlockNumber ss_get_location(HeapScanDesc scan);
+ extern void SyncScanShmemInit(void);
+ extern Size SyncScanShmemSize(void);
+ 
  #endif   /* HEAPAM_H */
Index: src/include/access/relscan.h
===================================================================
RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/include/access/relscan.h,v
retrieving revision 1.54
diff -c -r1.54 relscan.h
*** src/include/access/relscan.h	30 May 2007 20:12:02 -0000	1.54
--- src/include/access/relscan.h	2 Jun 2007 07:51:10 -0000
***************
*** 34,39 ****
--- 34,40 ----
  	bool		rs_inited;		/* false = scan not init'd yet */
  	HeapTupleData rs_ctup;		/* current tuple in scan, if any */
  	BlockNumber rs_cblock;		/* current block # in scan, if any */
+ 	BlockNumber rs_startpage;  /* page where this scan began */
  	Buffer		rs_cbuf;		/* current buffer in scan, if any */
  	/* NB: if rs_cbuf is not InvalidBuffer, we hold a pin on that buffer */
  	ItemPointerData rs_mctid;	/* marked scan position, if any */
Index: src/include/storage/lwlock.h
===================================================================
RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/include/storage/lwlock.h,v
retrieving revision 1.36
diff -c -r1.36 lwlock.h
*** src/include/storage/lwlock.h	16 Apr 2007 18:30:04 -0000	1.36
--- src/include/storage/lwlock.h	31 May 2007 10:21:28 -0000
***************
*** 62,67 ****
--- 62,68 ----
  	AddinShmemInitLock,
  	AutovacuumLock,
  	AutovacuumScheduleLock,
+ 	SyncScanLock,
  	/* Individual lock IDs end here */
  	FirstBufMappingLock,
  	FirstLockMgrLock = FirstBufMappingLock + NUM_BUFFER_PARTITIONS,
Index: src/test/regress/parallel_schedule
===================================================================
RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/test/regress/parallel_schedule,v
retrieving revision 1.42
diff -c -r1.42 parallel_schedule
*** src/test/regress/parallel_schedule	2 Apr 2007 03:49:42 -0000	1.42
--- src/test/regress/parallel_schedule	4 Jun 2007 09:07:27 -0000
***************
*** 61,67 ****
  # ----------
  # The fourth group of parallel test
  # ----------
! test: select_into select_distinct select_distinct_on select_implicit select_having subselect union case join aggregates transactions random portals arrays btree_index hash_index update namespace prepared_xacts delete
  
  test: privileges
  test: misc
--- 61,67 ----
  # ----------
  # The fourth group of parallel test
  # ----------
! test: select_into select_distinct select_distinct_on select_implicit select_having subselect union case join aggregates transactions random portals arrays btree_index hash_index update namespace prepared_xacts delete syncscan
  
  test: privileges
  test: misc
Index: src/test/regress/expected/syncscan.out
===================================================================
RCS file: src/test/regress/expected/syncscan.out
diff -N src/test/regress/expected/syncscan.out
*** /dev/null	1 Jan 1970 00:00:00 -0000
--- src/test/regress/expected/syncscan.out	4 Jun 2007 09:34:00 -0000
***************
*** 0 ****
--- 1,24 ----
+ --
+ --  Sequential scan synchronization
+ --
+ CREATE TABLE ss_test (id VARCHAR(10), data VARCHAR(500));
+ INSERT INTO ss_test SELECT '1st half', repeat('a',500) FROM generate_series(1,10000);
+ INSERT INTO ss_test SELECT '2nd half', repeat('a',500) FROM generate_series(1,10000);
+ -- Scan little over half of the table with a sequential scan. OFFSET 1000 because we don't
+ -- track the scan location exactly, so the next scan will start a little bit behind from
+ -- where this scan stops.
+ SELECT id FROM ss_test WHERE id = '2nd half' OFFSET 1000 LIMIT 1;
+     id    
+ ----------
+  2nd half
+ (1 row)
+ 
+ -- If synchronized scans are used, this should begin roughly from where the above scan stopped,
+ -- and should return '2nd half'
+ SELECT id FROM ss_test LIMIT 1;
+     id    
+ ----------
+  2nd half
+ (1 row)
+ 
+ DROP TABLE ss_test;
Index: src/test/regress/sql/syncscan.sql
===================================================================
RCS file: src/test/regress/sql/syncscan.sql
diff -N src/test/regress/sql/syncscan.sql
*** /dev/null	1 Jan 1970 00:00:00 -0000
--- src/test/regress/sql/syncscan.sql	4 Jun 2007 09:37:56 -0000
***************
*** 0 ****
--- 1,22 ----
+ --
+ --  Sequential scan synchronization
+ --
+ -- NOTE: This test depends on ss_test being large enough that synchronized
+ -- scans are used. Increase the number of rows inserted to it, or decrease
+ -- shared_buffers if that's not happening.
+ 
+ CREATE TABLE ss_test (id VARCHAR(10), data VARCHAR(500));
+ 
+ INSERT INTO ss_test SELECT '1st half', repeat('a',500) FROM generate_series(1,10000);
+ INSERT INTO ss_test SELECT '2nd half', repeat('a',500) FROM generate_series(1,10000);
+ 
+ -- Scan little over half of the table with a sequential scan. OFFSET 1000
+ -- because we don't track the scan location exactly, so the next scan will
+ -- start a little bit behind from where this scan stops.
+ SELECT id FROM ss_test WHERE id = '2nd half' OFFSET 1000 LIMIT 1;
+ 
+ -- If synchronized scans are used, this should begin roughly from where the
+ -- above scan stopped, and should return '2nd half'
+ SELECT id FROM ss_test LIMIT 1;
+ 
+ DROP TABLE ss_test;