Re: Optimize postgres protocol for fixed size arrays

From: Merlin Moncure <mmoncure(at)gmail(dot)com>
To: Mikko Tiihonen <mikko(dot)tiihonen(at)nitorcreations(dot)com>
Cc: pgsql-jdbc(at)postgresql(dot)org, pgsql-hackers(at)postgresql(dot)org
Subject: Re: Optimize postgres protocol for fixed size arrays
Date: 2011-11-22 22:46:24
Message-ID: CAHyXU0yfriOfd3GGd1n2JbwqpcehzczGUTE1CyD7GmcF+rGKsg@mail.gmail.com
Views: Raw Message | Whole Thread | Download mbox | Resend email
Thread:
Lists: pgsql-hackers pgsql-jdbc

On Tue, Nov 22, 2011 at 3:47 PM, Mikko Tiihonen
<mikko(dot)tiihonen(at)nitorcreations(dot)com> wrote:
> Hi,
>
> During conversion of the jdbc driver to use binary encoding when receiving
> array objects from postgres it was noticed
> that for example for int[] arrays the binary encoding is normally 30% to
> 200% larger in bytes than the text encoding.
>
> This was of concern to some users with slower links to database. Even though
> the encoded size was larger the binary
> encoding was still constantly faster (with 50% speed up for float[]).
>
> Here is a patch that adds a new flag to the protocol that is set when all
> elements of the array are of same fixed size.
> When the bit is set the 4 byte length is only sent once and not for each
> element. Another restriction is that the flag
> can only be set when there are no NULLs in the array.
>
> The postgres part is my first try at the problem and I really need some
> feedback around the detection of fixed size
> elements. I just made a guess and noticed that typlen != 0 seemed to work
> for the basic types I user for testing.
>
> First the postgres patch:
>
> diff --git a/src/backend/utils/adt/arrayfuncs.c
> b/src/backend/utils/adt/arrayfuncs.c
> index bfb6065..970272f 100644
> --- a/src/backend/utils/adt/arrayfuncs.c
> +++ b/src/backend/utils/adt/arrayfuncs.c
> @@ -86,7 +86,7 @@ static void ReadArrayBinary(StringInfo buf, int nitems,
>                                FmgrInfo *receiveproc, Oid typioparam, int32
> typmod,
>                                int typlen, bool typbyval, char typalign,
>                                Datum *values, bool *nulls,
> -                               bool *hasnulls, int32 *nbytes);
> +                               bool *hasnulls, int32 *nbytes, bool
> fixedlen);
>  static void CopyArrayEls(ArrayType *array,
>                         Datum *values, bool *nulls, int nitems,
>                         int typlen, bool typbyval, char typalign,
> @@ -1242,7 +1242,7 @@ array_recv(PG_FUNCTION_ARGS)
>                                                ndim, MAXDIM)));
>
>        flags = pq_getmsgint(buf, 4);
> -       if (flags != 0 && flags != 1)
> +       if ((flags & ~3) != 0)
>                ereport(ERROR,
>
>  (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
>                                 errmsg("invalid array flags")));
> @@ -1327,7 +1327,7 @@ array_recv(PG_FUNCTION_ARGS)
>                                        &my_extra->proc, typioparam, typmod,
>                                        typlen, typbyval, typalign,
>                                        dataPtr, nullsPtr,
> -                                       &hasnulls, &nbytes);
> +                                       &hasnulls, &nbytes, (flags & 2) !=
> 0);
>        if (hasnulls)
>        {
>                dataoffset = ARR_OVERHEAD_WITHNULLS(ndim, nitems);
> @@ -1390,7 +1390,8 @@ ReadArrayBinary(StringInfo buf,
>                                Datum *values,
>                                bool *nulls,
>                                bool *hasnulls,
> -                               int32 *nbytes)
> +                               int32 *nbytes,
> +                               bool fixedlen)
>  {
>        int                     i;
>        bool            hasnull;
> @@ -1403,7 +1404,7 @@ ReadArrayBinary(StringInfo buf,
>                char            csave;
>
>                /* Get and check the item length */
> -               itemlen = pq_getmsgint(buf, 4);
> +               itemlen = fixedlen ? typlen : pq_getmsgint(buf, 4);
>                if (itemlen < -1 || itemlen > (buf->len - buf->cursor))
>                        ereport(ERROR,
>
>  (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
> @@ -1496,6 +1497,7 @@ array_send(PG_FUNCTION_ARGS)
>        int                     bitmask;
>        int                     nitems,
>                                i;
> +       int                     flags;
>        int                     ndim,
>                           *dim;
>        StringInfoData buf;
> @@ -1535,6 +1537,8 @@ array_send(PG_FUNCTION_ARGS)
>        typbyval = my_extra->typbyval;
>        typalign = my_extra->typalign;
>
> +       flags = ARR_HASNULL(v) ? 1 : (typlen > 0 ? 2 : 0);
> +
>        ndim = ARR_NDIM(v);
>        dim = ARR_DIMS(v);
>        nitems = ArrayGetNItems(ndim, dim);
> @@ -1543,7 +1547,7 @@ array_send(PG_FUNCTION_ARGS)
>
>        /* Send the array header information */
>        pq_sendint(&buf, ndim, 4);
> -       pq_sendint(&buf, ARR_HASNULL(v) ? 1 : 0, 4);
> +       pq_sendint(&buf, flags, 4);
>        pq_sendint(&buf, element_type, sizeof(Oid));
>        for (i = 0; i < ndim; i++)
>        {
> @@ -1571,7 +1575,8 @@ array_send(PG_FUNCTION_ARGS)
>
>                        itemvalue = fetch_att(p, typbyval, typlen);
>                        outputbytes = SendFunctionCall(&my_extra->proc,
> itemvalue);
> -                       pq_sendint(&buf, VARSIZE(outputbytes) - VARHDRSZ,
> 4);
> +                       if ((flags & 2) == 0)
> +                               pq_sendint(&buf, VARSIZE(outputbytes) -
> VARHDRSZ, 4);
>                        pq_sendbytes(&buf, VARDATA(outputbytes),
>                                                 VARSIZE(outputbytes) -
> VARHDRSZ);
>                        pfree(outputbytes);
>
>
>
>
> And here is the matching jdbc driver patch (similar changes need to be done
> to other drivers too):
>
> Index: org/postgresql/jdbc2/AbstractJdbc2Array.java
> ===================================================================
> RCS file:
> /cvsroot/jdbc/pgjdbc/org/postgresql/jdbc2/AbstractJdbc2Array.java,v
> retrieving revision 1.29
> diff -u -r1.29 AbstractJdbc2Array.java
> --- org/postgresql/jdbc2/AbstractJdbc2Array.java        30 Sep 2011 10:08:17
> -0000      1.29
> +++ org/postgresql/jdbc2/AbstractJdbc2Array.java        22 Nov 2011 21:06:15
> -0000
> @@ -175,7 +175,7 @@
>
>     private Object readBinaryArray(int index, int count) throws SQLException
> {
>         int dimensions = ByteConverter.int4(fieldBytes, 0);
> -        //int flags = ByteConverter.int4(fieldBytes, 4); // bit 0:
> 0=no-nulls, 1=has-nulls
> +        int flags = ByteConverter.int4(fieldBytes, 4); // bit 0:
> 0=no-nulls, 1=has-nulls, bit 1: 0=variable-size objects, 2=fixed-size
>         int elementOid = ByteConverter.int4(fieldBytes, 8);
>         int pos = 12;
>         int[] dims = new int[dimensions];
> @@ -190,8 +190,12 @@
>             dims[0] = Math.min(count, dims[0]);
>         }
>         Object arr =
> java.lang.reflect.Array.newInstance(elementOidToClass(elementOid), dims);
> +        int len = -1;
> +        if ((flags & 2) != 0) {
> +            len = fixedBinaryLengthOfOid(elementOid);
> +        }
>         try {
> -            storeValues((Object[]) arr, elementOid, dims, pos, 0, index);
> +            storeValues((Object[]) arr, elementOid, dims, len, pos, 0,
> index);
>         }
>         catch (IOException ioe)
>         {
> @@ -200,18 +204,27 @@
>         return arr;
>     }
>
> -    private int storeValues(final Object[] arr, int elementOid, final int[]
> dims, int pos, final int thisDimension, int index) throws SQLException,
> IOException {
> +    private int storeValues(final Object[] arr, int elementOid, final int[]
> dims, int len, int pos, final int thisDimension, int index) throws
> SQLException, IOException {
>         if (thisDimension == dims.length - 1) {
> -            for (int i = 1; i < index; ++i) {
> -                int len = ByteConverter.int4(fieldBytes, pos); pos += 4;
> -                if (len != -1) {
> -                    pos += len;
> +            if (len > 0) {
> +                pos += len * (index - 1);
> +            } else {
> +                for (int i = 1; i < index; ++i) {
> +                    int l = ByteConverter.int4(fieldBytes, pos); pos += 4;
> +                    if (l != -1) {
> +                        pos += l;
> +                    }
>                 }
>             }
>             for (int i = 0; i < dims[thisDimension]; ++i) {
> -                int len = ByteConverter.int4(fieldBytes, pos); pos += 4;
> -                if (len == -1) {
> -                    continue;
> +                int l;
> +                if (len > 0) {
> +                    l = len;
> +                } else {
> +                    l = ByteConverter.int4(fieldBytes, pos); pos += 4;
> +                    if (l == -1) {
> +                        continue;
> +                    }
>                 }
>                 switch (elementOid) {
>                 case Oid.INT2:
> @@ -232,14 +245,14 @@
>                 case Oid.TEXT:
>                 case Oid.VARCHAR:
>                     Encoding encoding = connection.getEncoding();
> -                    arr[i] = encoding.decode(fieldBytes, pos, len);
> +                    arr[i] = encoding.decode(fieldBytes, pos, l);
>                     break;
>                 }
> -                pos += len;
> +                pos += l;
>             }
>         } else {
>             for (int i = 0; i < dims[thisDimension]; ++i) {
> -                pos = storeValues((Object[]) arr[i], elementOid, dims, pos,
> thisDimension + 1, 0);
> +                pos = storeValues((Object[]) arr[i], elementOid, dims, len,
> pos, thisDimension + 1, 0);
>             }
>         }
>         return pos;
> @@ -358,6 +371,27 @@
>         }
>     }
>
> +    private int fixedBinaryLengthOfOid(int oid) throws SQLException {
> +        switch (oid) {
> +        case Oid.INT2:
> +            return 2;
> +        case Oid.INT4:
> +            return 4;
> +        case Oid.INT8:
> +            return 8;
> +        case Oid.FLOAT4:
> +            return 4;
> +        case Oid.FLOAT8:
> +            return 8;
> +        case Oid.TEXT:
> +        case Oid.VARCHAR:
> +            return -1;
> +        default:
> +            throw org.postgresql.Driver.notImplemented(this.getClass(),
> +                    "readBinaryArray(data,oid)");
> +        }
> +    }
> +
>     /**
>      * Build {(at)link ArrayList} from field's string input. As a result
>      * of this method {(at)link #arrayList} is build. Method can be called

+1. just be advised that this is a significant change to the binary
wire format and will cause headaches to those that are using it (you
are supposed to expect those headaches though -- using the wire format
is analogous to attending a rock concert). maybe this would be an
appropriate place to discuss a well advertised place in the
documentation that describes changes to the wire format?

libpqtypes users of course will not have to worry about this because
the format change will be handled in the library.

merlin

In response to

Browse pgsql-hackers by date

  From Date Subject
Next Message magtiki2@juno.com 2011-11-22 23:19:46 xpath_table
Previous Message Kevin Grittner 2011-11-22 22:35:51 Re: FlexLocks

Browse pgsql-jdbc by date

  From Date Subject
Next Message Oliver Jowett 2011-11-22 23:41:46 Re: [JDBC] Optimize postgres protocol for fixed size arrays
Previous Message ktm@rice.edu 2011-11-22 21:58:06 Re: Optimize postgres protocol for fixed size arrays