diff --git a/contrib/pgbench/pgbench.c b/contrib/pgbench/pgbench.c
index 00cab73..12c1338 100644
--- a/contrib/pgbench/pgbench.c
+++ b/contrib/pgbench/pgbench.c
@@ -145,6 +145,9 @@ char *index_tablespace = NULL;
#define naccounts 100000
bool use_log; /* log transaction latencies to a file */
+bool use_log_sampling; /* sample the log randomly */
+int nsample_rate = 100; /* default log sampling rate */
+
bool is_connect; /* establish connection for each transaction */
bool is_latencies; /* report per-command latencies */
int main_pid; /* main process id used in log filename */
@@ -364,6 +367,7 @@ usage(void)
" -f FILENAME read transaction script from FILENAME\n"
" -j NUM number of threads (default: 1)\n"
" -l write transaction times to log file\n"
+ " -R NUM log sampling rate in pct (default: 100)\n"
" -M simple|extended|prepared\n"
" protocol for submitting queries to server (default: simple)\n"
" -n do not run VACUUM before tests\n"
@@ -877,21 +881,25 @@ top:
instr_time diff;
double usec;
- INSTR_TIME_SET_CURRENT(now);
- diff = now;
- INSTR_TIME_SUBTRACT(diff, st->txn_begin);
- usec = (double) INSTR_TIME_GET_MICROSEC(diff);
+ /* either no sampling or is within the sample */
+ if ((! use_log_sampling) || (getrand(thread, 0, 99) < nsample_rate)) {
+
+ INSTR_TIME_SET_CURRENT(now);
+ diff = now;
+ INSTR_TIME_SUBTRACT(diff, st->txn_begin);
+ usec = (double) INSTR_TIME_GET_MICROSEC(diff);
#ifndef WIN32
- /* This is more than we really ought to know about instr_time */
- fprintf(logfile, "%d %d %.0f %d %ld %ld\n",
- st->id, st->cnt, usec, st->use_file,
- (long) now.tv_sec, (long) now.tv_usec);
+ /* This is more than we really ought to know about instr_time */
+ fprintf(logfile, "%d %d %.0f %d %ld %ld\n",
+ st->id, st->cnt, usec, st->use_file,
+ (long) now.tv_sec, (long) now.tv_usec);
#else
- /* On Windows, instr_time doesn't provide a timestamp anyway */
- fprintf(logfile, "%d %d %.0f %d 0 0\n",
- st->id, st->cnt, usec, st->use_file);
+ /* On Windows, instr_time doesn't provide a timestamp anyway */
+ fprintf(logfile, "%d %d %.0f %d 0 0\n",
+ st->id, st->cnt, usec, st->use_file);
#endif
+ }
}
if (commands[st->state]->type == SQL_COMMAND)
@@ -1962,7 +1970,7 @@ main(int argc, char **argv)
state = (CState *) xmalloc(sizeof(CState));
memset(state, 0, sizeof(CState));
- while ((c = getopt_long(argc, argv, "ih:nvp:dSNc:j:Crs:t:T:U:lf:D:F:M:", long_options, &optindex)) != -1)
+ while ((c = getopt_long(argc, argv, "ih:nvp:dSNc:j:Crs:t:T:U:lf:R:D:F:M:", long_options, &optindex)) != -1)
{
switch (c)
{
@@ -2070,6 +2078,15 @@ main(int argc, char **argv)
case 'l':
use_log = true;
break;
+ case 'R':
+ use_log_sampling = true;
+ nsample_rate = atoi(optarg);
+ if (nsample_rate <= 0 || nsample_rate > 100)
+ {
+ fprintf(stderr, "invalid sampling rate: %d\n", nsample_rate);
+ exit(1);
+ }
+ break;
case 'f':
ttype = 3;
filename = optarg;
@@ -2158,6 +2175,12 @@ main(int argc, char **argv)
exit(1);
}
+ /* -R may be used only with -l */
+ if (use_log_sampling && (! use_log)) {
+ fprintf(stderr, "log sampling rate is allowed only when logging transactions\n");
+ exit(1);
+ }
+
/*
* is_latencies only works with multiple threads in thread-based
* implementations, not fork-based ones, because it supposes that the
diff --git a/doc/src/sgml/pgbench.sgml b/doc/src/sgml/pgbench.sgml
index 437fcea..962e446 100644
--- a/doc/src/sgml/pgbench.sgml
+++ b/doc/src/sgml/pgbench.sgml
@@ -317,6 +317,22 @@ pgbench options> dbname>
+ rate>
+
+
+ Sampling rate, used when writing data into the log in percent. 100 means all
+ transactions will be logged, 1 means only 1% of the transactions will be logged.
+ Default is 100 (all transactions).
+
+
+ Be careful when processing the log file - e.g. when computing tps values, you
+ need to multiply the numbers accordingly (e.g. with 1% sample you'll get 1/100
+ of the actual tps).
+
+
+
+
+
querymode>