From c6ae575dd483b85cec6748c2e014d0f32565b4eb Mon Sep 17 00:00:00 2001
From: Amit Khandekar <amitdkhan.pg@gmail.com>
Date: Fri, 19 Mar 2021 20:22:44 +0800
Subject: [PATCH 1/2] Speed up xor'ing of two gist index signatures for
 tsvectors

In hemdistsign(), rather than using xor operator on char values, use
it in 64-bit chunks. And since the chunks are 64-bit, use popcount64()
on each of the chunks. I have checked that the two bitvector pointer
arguments of hemdistsign() are not always 64-bit aligned. So do the
64-bit chunks only if both pointers are 8 byte-aligned.

This results in speed-up in Gist index creation for tsvectors. With
default siglen (124), the speed up is 12-20%. With siglen=700, it is
30-50%. So with longer signature lengths, we get higher percentage
speed-up.

Similar results are seen in other types using gist index, such as
intarray, hstore, and ltree that are availale in contrib.

With smaller siglens such as 10, 20 etc, there is a bit of a reduction
in speed by 1-7% if we use this optimization. It's probably because of
an extra function call for pg_xorcount(); and also might be due to
the extra logic in pg_xorcount(). So for siglen less than 32,
keep the existing method using byte-by-byte traversal.
---
 contrib/hstore/hstore_gist.c      | 17 ++--------------
 contrib/intarray/_intbig_gist.c   | 18 +---------------
 contrib/ltree/_ltree_gist.c       | 19 ++---------------
 src/backend/utils/adt/tsgistidx.c | 26 +++++------------------
 src/include/port/pg_bitutils.h    | 17 ++++++++++++++++
 src/port/pg_bitutils.c            | 34 +++++++++++++++++++++++++++++++
 6 files changed, 61 insertions(+), 70 deletions(-)

diff --git a/contrib/hstore/hstore_gist.c b/contrib/hstore/hstore_gist.c
index 102c9cea72..4970a0a2f0 100644
--- a/contrib/hstore/hstore_gist.c
+++ b/contrib/hstore/hstore_gist.c
@@ -8,6 +8,7 @@
 #include "access/stratnum.h"
 #include "catalog/pg_type.h"
 #include "hstore.h"
+#include "port/pg_bitutils.h"
 #include "utils/pg_crc.h"
 
 /* gist_hstore_ops opclass options */
@@ -256,20 +257,6 @@ sizebitvec(BITVECP sign, int siglen)
 	return size;
 }
 
-static int
-hemdistsign(BITVECP a, BITVECP b, int siglen)
-{
-	int			i,
-				dist = 0;
-
-	LOOPBIT(siglen)
-	{
-		if (GETBIT(a, i) != GETBIT(b, i))
-			dist++;
-	}
-	return dist;
-}
-
 static int
 hemdist(GISTTYPE *a, GISTTYPE *b, int siglen)
 {
@@ -283,7 +270,7 @@ hemdist(GISTTYPE *a, GISTTYPE *b, int siglen)
 	else if (ISALLTRUE(b))
 		return SIGLENBIT(siglen) - sizebitvec(GETSIGN(a), siglen);
 
-	return hemdistsign(GETSIGN(a), GETSIGN(b), siglen);
+	return pg_xorcount(GETSIGN(a), GETSIGN(b), siglen);
 }
 
 static int32
diff --git a/contrib/intarray/_intbig_gist.c b/contrib/intarray/_intbig_gist.c
index 18ecd8cda6..8aa76042b0 100644
--- a/contrib/intarray/_intbig_gist.c
+++ b/contrib/intarray/_intbig_gist.c
@@ -211,22 +211,6 @@ sizebitvec(BITVECP sign, int siglen)
 	return pg_popcount(sign, siglen);
 }
 
-static int
-hemdistsign(BITVECP a, BITVECP b, int siglen)
-{
-	int			i,
-				diff,
-				dist = 0;
-
-	LOOPBYTE(siglen)
-	{
-		diff = (unsigned char) (a[i] ^ b[i]);
-		/* Using the popcount functions here isn't likely to win */
-		dist += pg_number_of_ones[diff];
-	}
-	return dist;
-}
-
 static int
 hemdist(GISTTYPE *a, GISTTYPE *b, int siglen)
 {
@@ -240,7 +224,7 @@ hemdist(GISTTYPE *a, GISTTYPE *b, int siglen)
 	else if (ISALLTRUE(b))
 		return SIGLENBIT(siglen) - sizebitvec(GETSIGN(a), siglen);
 
-	return hemdistsign(GETSIGN(a), GETSIGN(b), siglen);
+	return pg_xorcount(GETSIGN(a), GETSIGN(b), siglen);
 }
 
 Datum
diff --git a/contrib/ltree/_ltree_gist.c b/contrib/ltree/_ltree_gist.c
index 72516c3b6b..85a840311e 100644
--- a/contrib/ltree/_ltree_gist.c
+++ b/contrib/ltree/_ltree_gist.c
@@ -180,22 +180,6 @@ sizebitvec(BITVECP sign, int siglen)
 	return pg_popcount((const char *) sign, siglen);
 }
 
-static int
-hemdistsign(BITVECP a, BITVECP b, int siglen)
-{
-	int			i,
-				diff,
-				dist = 0;
-
-	ALOOPBYTE(siglen)
-	{
-		diff = (unsigned char) (a[i] ^ b[i]);
-		/* Using the popcount functions here isn't likely to win */
-		dist += pg_number_of_ones[diff];
-	}
-	return dist;
-}
-
 static int
 hemdist(ltree_gist *a, ltree_gist *b, int siglen)
 {
@@ -209,7 +193,8 @@ hemdist(ltree_gist *a, ltree_gist *b, int siglen)
 	else if (LTG_ISALLTRUE(b))
 		return ASIGLENBIT(siglen) - sizebitvec(LTG_SIGN(a), siglen);
 
-	return hemdistsign(LTG_SIGN(a), LTG_SIGN(b), siglen);
+	return pg_xorcount((const char *) LTG_SIGN(a), (const char *) LTG_SIGN(b),
+					   siglen);
 }
 
 
diff --git a/src/backend/utils/adt/tsgistidx.c b/src/backend/utils/adt/tsgistidx.c
index c09eefdda2..1659bc2727 100644
--- a/src/backend/utils/adt/tsgistidx.c
+++ b/src/backend/utils/adt/tsgistidx.c
@@ -486,22 +486,6 @@ sizebitvec(BITVECP sign, int siglen)
 	return pg_popcount(sign, siglen);
 }
 
-static int
-hemdistsign(BITVECP a, BITVECP b, int siglen)
-{
-	int			i,
-				diff,
-				dist = 0;
-
-	LOOPBYTE(siglen)
-	{
-		diff = (unsigned char) (a[i] ^ b[i]);
-		/* Using the popcount functions here isn't likely to win */
-		dist += pg_number_of_ones[diff];
-	}
-	return dist;
-}
-
 static int
 hemdist(SignTSVector *a, SignTSVector *b)
 {
@@ -520,7 +504,7 @@ hemdist(SignTSVector *a, SignTSVector *b)
 
 	Assert(siglena == siglenb);
 
-	return hemdistsign(GETSIGN(a), GETSIGN(b), siglena);
+	return pg_xorcount(GETSIGN(a), GETSIGN(b), siglena);
 }
 
 Datum
@@ -551,7 +535,7 @@ gtsvector_penalty(PG_FUNCTION_ARGS)
 				(float) (siglenbit + 1);
 		}
 		else
-			*penalty = hemdistsign(sign, orig, siglen);
+			*penalty = pg_xorcount(sign, orig, siglen);
 
 		pfree(sign);
 	}
@@ -611,7 +595,7 @@ hemdistcache(CACHESIGN *a, CACHESIGN *b, int siglen)
 	else if (b->allistrue)
 		return SIGLENBIT(siglen) - sizebitvec(a->sign, siglen);
 
-	return hemdistsign(a->sign, b->sign, siglen);
+	return pg_xorcount(a->sign, b->sign, siglen);
 }
 
 Datum
@@ -732,7 +716,7 @@ gtsvector_picksplit(PG_FUNCTION_ARGS)
 							   siglen);
 		}
 		else
-			size_alpha = hemdistsign(cache[j].sign, GETSIGN(datum_l), siglen);
+			size_alpha = pg_xorcount(cache[j].sign, GETSIGN(datum_l), siglen);
 
 		if (ISALLTRUE(datum_r) || cache[j].allistrue)
 		{
@@ -746,7 +730,7 @@ gtsvector_picksplit(PG_FUNCTION_ARGS)
 							   siglen);
 		}
 		else
-			size_beta = hemdistsign(cache[j].sign, GETSIGN(datum_r), siglen);
+			size_beta = pg_xorcount(cache[j].sign, GETSIGN(datum_r), siglen);
 
 		if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.1))
 		{
diff --git a/src/include/port/pg_bitutils.h b/src/include/port/pg_bitutils.h
index f9b77ec278..26c8c9e38e 100644
--- a/src/include/port/pg_bitutils.h
+++ b/src/include/port/pg_bitutils.h
@@ -214,6 +214,23 @@ extern int	(*pg_popcount64) (uint64 word);
 /* Count the number of one-bits in a byte array */
 extern uint64 pg_popcount(const char *buf, int bytes);
 
+/* Count the number of 1-bits in the result of xor operation */
+extern uint64 pg_xorcount_long(const char *a, const char *b, int bytes);
+static inline uint64 pg_xorcount(const char *a, const char *b, int bytes)
+{
+	/* For smaller lengths, do simple byte-by-byte traversal */
+	if (bytes <= 32)
+	{
+		uint64		popcnt = 0;
+
+		while (bytes--)
+			popcnt += pg_number_of_ones[(unsigned char) (*a++ ^ *b++)];
+		return popcnt;
+	}
+	else
+		return pg_xorcount_long(a, b, bytes);
+}
+
 /*
  * Rotate the bits of "word" to the right by n bits.
  */
diff --git a/src/port/pg_bitutils.c b/src/port/pg_bitutils.c
index 2252021854..41a44b17a3 100644
--- a/src/port/pg_bitutils.c
+++ b/src/port/pg_bitutils.c
@@ -319,3 +319,37 @@ pg_popcount(const char *buf, int bytes)
 
 	return popcnt;
 }
+
+/*
+ * pg_xorcount
+ *		Count the number of 1-bits in the result of xor operation.
+ */
+uint64
+pg_xorcount_long(const char *a, const char *b, int bytes)
+{
+	uint64		popcnt = 0;
+
+#if SIZEOF_VOID_P >= 8
+	/* Process in 64-bit chunks if both are aligned. */
+	if (PointerIsAligned(a, uint64) && PointerIsAligned(b, uint64))
+	{
+		const uint64 *a_words = (const uint64 *) a;
+		const uint64 *b_words = (const uint64 *) b;
+
+		while (bytes >= 8)
+		{
+			popcnt += pg_popcount64(*a_words++ ^ *b_words++);
+			bytes -= 8;
+		}
+
+		a = (const char *) a_words;
+		b = (const char *) b_words;
+	}
+#endif
+
+	/* Process any remaining bytes */
+	while (bytes--)
+		popcnt += pg_number_of_ones[(unsigned char) (*a++ ^ *b++)];
+
+	return popcnt;
+}
-- 
2.17.1

