From d50ae41bf1d1c4e2786a55a610300606c1ec43a5 Mon Sep 17 00:00:00 2001 From: Tomas Vondra Date: Sat, 6 Jul 2019 15:41:37 +0200 Subject: [PATCH 1/2] Optimize partial TOAST decompression --- src/backend/access/heap/tuptoaster.c | 57 ++++++++++++++++++++++------ src/common/pg_lzcompress.c | 26 +++++++++++++ src/include/common/pg_lzcompress.h | 1 + 3 files changed, 72 insertions(+), 12 deletions(-) diff --git a/src/backend/access/heap/tuptoaster.c b/src/backend/access/heap/tuptoaster.c index 55d6e91d1c..1eb6ccaca2 100644 --- a/src/backend/access/heap/tuptoaster.c +++ b/src/backend/access/heap/tuptoaster.c @@ -61,7 +61,8 @@ typedef struct toast_compress_header */ #define TOAST_COMPRESS_HDRSZ ((int32) sizeof(toast_compress_header)) #define TOAST_COMPRESS_RAWSIZE(ptr) (((toast_compress_header *) (ptr))->rawsize) -#define TOAST_COMPRESS_RAWDATA(ptr) \ +#define TOAST_COMPRESS_SIZE(ptr) ((int32) VARSIZE(ptr) - TOAST_COMPRESS_HDRSZ) +#define TOAST_COMPRESS_DATA(ptr) \ (((char *) (ptr)) + TOAST_COMPRESS_HDRSZ) #define TOAST_COMPRESS_SET_RAWSIZE(ptr, len) \ (((toast_compress_header *) (ptr))->rawsize = (len)) @@ -252,6 +253,8 @@ heap_tuple_untoast_attr(struct varlena *attr) * * Public entry point to get back part of a toasted value * from compression or external storage. + * + * Note if slicelength is negative, return suffix of the value. * ---------- */ struct varlena * @@ -273,8 +276,23 @@ heap_tuple_untoast_attr_slice(struct varlena *attr, if (!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)) return toast_fetch_datum_slice(attr, sliceoffset, slicelength); - /* fetch it back (compressed marker will get set automatically) */ - preslice = toast_fetch_datum(attr); + /* + * If only need the prefix slice, fetch enough datums to decompress. + * Otherwise, fetch all datums. + */ + if (slicelength > 0 && sliceoffset >= 0) + { + int32 max_size; + max_size = pglz_maximum_compressed_size(sliceoffset + slicelength, + TOAST_COMPRESS_SIZE(attr)); + /* + * Be sure to get enough compressed slice + * and compressed marker will get set automatically + */ + preslice = toast_fetch_datum_slice(attr, 0, max_size); + } + else + preslice = toast_fetch_datum(attr); } else if (VARATT_IS_EXTERNAL_INDIRECT(attr)) { @@ -1391,7 +1409,7 @@ toast_compress_datum(Datum value) */ len = pglz_compress(VARDATA_ANY(DatumGetPointer(value)), valsize, - TOAST_COMPRESS_RAWDATA(tmp), + TOAST_COMPRESS_DATA(tmp), PGLZ_strategy_default); if (len >= 0 && len + TOAST_COMPRESS_HDRSZ < valsize - 2) @@ -2031,7 +2049,8 @@ toast_fetch_datum(struct varlena *attr) * Reconstruct a segment of a Datum from the chunks saved * in the toast relation * - * Note that this function only supports non-compressed external datums. + * Note that this function supports non-compressed external datums + * and compressed external datum slices at the start of the object. * ---------- */ static struct varlena * @@ -2072,10 +2091,10 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset, int32 length) VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); /* - * It's nonsense to fetch slices of a compressed datum -- this isn't lo_* - * we can't return a compressed datum which is meaningful to toast later + * It's meaningful to fetch slices at the start of a compressed datum, + * since we can decompress the prefix raw data from it. */ - Assert(!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)); + Assert(!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) || 0 == sliceoffset); attrsize = toast_pointer.va_extsize; totalchunks = ((attrsize - 1) / TOAST_MAX_CHUNK_SIZE) + 1; @@ -2086,12 +2105,26 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset, int32 length) length = 0; } + if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) && length > 0) + { + /* + * If we are going to fetch the compressed external datum slice + * at the start of the object, also should fetch rawsize field used + * to record the size of the raw data. + */ + length = length + sizeof(int32); + } + if (((sliceoffset + length) > attrsize) || length < 0) length = attrsize - sliceoffset; result = (struct varlena *) palloc(length + VARHDRSZ); - SET_VARSIZE(result, length + VARHDRSZ); + if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)) { + SET_VARSIZE_COMPRESSED(result, length + VARHDRSZ); + } else { + SET_VARSIZE(result, length + VARHDRSZ); + } if (length == 0) return result; /* Can save a lot of work at this point! */ @@ -2274,8 +2307,8 @@ toast_decompress_datum(struct varlena *attr) palloc(TOAST_COMPRESS_RAWSIZE(attr) + VARHDRSZ); SET_VARSIZE(result, TOAST_COMPRESS_RAWSIZE(attr) + VARHDRSZ); - if (pglz_decompress(TOAST_COMPRESS_RAWDATA(attr), - VARSIZE(attr) - TOAST_COMPRESS_HDRSZ, + if (pglz_decompress(TOAST_COMPRESS_DATA(attr), + TOAST_COMPRESS_SIZE(attr), VARDATA(result), TOAST_COMPRESS_RAWSIZE(attr), true) < 0) elog(ERROR, "compressed data is corrupted"); @@ -2301,7 +2334,7 @@ toast_decompress_datum_slice(struct varlena *attr, int32 slicelength) result = (struct varlena *) palloc(slicelength + VARHDRSZ); - rawsize = pglz_decompress(TOAST_COMPRESS_RAWDATA(attr), + rawsize = pglz_decompress(TOAST_COMPRESS_DATA(attr), VARSIZE(attr) - TOAST_COMPRESS_HDRSZ, VARDATA(result), slicelength, false); diff --git a/src/common/pg_lzcompress.c b/src/common/pg_lzcompress.c index 988b3987d0..ac7d66db77 100644 --- a/src/common/pg_lzcompress.c +++ b/src/common/pg_lzcompress.c @@ -771,3 +771,29 @@ pglz_decompress(const char *source, int32 slen, char *dest, */ return (char *) dp - dest; } + + + +/* ---------- + * pglz_max_compressed_size - + * + * Calculate the maximum size of the compressed slice corresponding to the + * raw slice. Return the maximum size, or total compressed size if maximum + * size is larger than total compressed size. + * ---------- + */ +int32 +pglz_maximum_compressed_size(int32 raw_slice_size, int32 total_compressed_size) +{ + int32 result; + + /* + * Use int64 to prevent overflow during calculation. + */ + result = (int32)((int64)raw_slice_size * 9 + 8) / 8; + + /* + * Note that slice compressed size will never be larger than total compressed size. + */ + return result > total_compressed_size ? total_compressed_size: result; +} diff --git a/src/include/common/pg_lzcompress.h b/src/include/common/pg_lzcompress.h index 555576436c..cda3e1d364 100644 --- a/src/include/common/pg_lzcompress.h +++ b/src/include/common/pg_lzcompress.h @@ -87,5 +87,6 @@ extern int32 pglz_compress(const char *source, int32 slen, char *dest, const PGLZ_Strategy *strategy); extern int32 pglz_decompress(const char *source, int32 slen, char *dest, int32 rawsize, bool check_complete); +extern int32 pglz_maximum_compressed_size(int32 raw_slice_size, int32 raw_size); #endif /* _PG_LZCOMPRESS_H_ */ -- 2.20.1