From 119159103e1f7ce74afb7ccbd69355440ab76d36 Mon Sep 17 00:00:00 2001 From: Jelte Fennema Date: Mon, 12 Sep 2022 09:44:06 +0200 Subject: [PATCH v12 3/4] Support load balancing in libpq This adds support for load balancing to libpq using the newly added load_balance_hosts parameter. When setting the load_balance_hosts parameter to random, hosts and addresses will be connected to in a random order. This then results in load balancing across these hosts/addresses if multiple clients do this at the same time. This patch implements two levels of random load balancing: 1. The given hosts are randomly shuffled, before resolving them one-by-one. 2. Once a host its addresses get resolved, those addresses are shuffled, before trying to connect to them one-by-one. --- .cirrus.yml | 16 ++- doc/src/sgml/libpq.sgml | 69 ++++++++++ doc/src/sgml/regress.sgml | 11 +- src/interfaces/libpq/fe-connect.c | 118 ++++++++++++++++++ src/interfaces/libpq/libpq-int.h | 18 ++- src/interfaces/libpq/meson.build | 2 + .../libpq/t/003_loadbalance_host_list.pl | 76 +++++++++++ src/interfaces/libpq/t/004_loadbalance_dns.pl | 103 +++++++++++++++ 8 files changed, 410 insertions(+), 3 deletions(-) create mode 100644 src/interfaces/libpq/t/003_loadbalance_host_list.pl create mode 100644 src/interfaces/libpq/t/004_loadbalance_dns.pl diff --git a/.cirrus.yml b/.cirrus.yml index 505c50f3285..9f0f882c03c 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -25,7 +25,7 @@ env: MTEST_ARGS: --print-errorlogs --no-rebuild -C build PGCTLTIMEOUT: 120 # avoids spurious failures during parallel tests TEMP_CONFIG: ${CIRRUS_WORKING_DIR}/src/tools/ci/pg_ci_base.conf - PG_TEST_EXTRA: kerberos ldap ssl + PG_TEST_EXTRA: kerberos ldap ssl loadbalance # What files to preserve in case tests fail @@ -313,6 +313,14 @@ task: mkdir -m 770 /tmp/cores chown root:postgres /tmp/cores sysctl kernel.core_pattern='/tmp/cores/%e-%s-%p.core' + + setup_hosts_file_script: | + cat >> /etc/hosts <<-EOF + 127.0.0.1 pg-loadbalancetest + 127.0.0.2 pg-loadbalancetest + 127.0.0.3 pg-loadbalancetest + EOF + setup_additional_packages_script: | #apt-get update #DEBIAN_FRONTEND=noninteractive apt-get -y install ... @@ -564,6 +572,12 @@ task: setup_additional_packages_script: | REM choco install -y --no-progress ... + setup_hosts_file_script: | + echo 127.0.0.1 pg-loadbalancetest >> c:\Windows\System32\Drivers\etc\hosts + echo 127.0.0.2 pg-loadbalancetest >> c:\Windows\System32\Drivers\etc\hosts + echo 127.0.0.3 pg-loadbalancetest >> c:\Windows\System32\Drivers\etc\hosts + type c:\Windows\System32\Drivers\etc\hosts + # Use /DEBUG:FASTLINK to avoid high memory usage during linking configure_script: | vcvarsall x64 diff --git a/doc/src/sgml/libpq.sgml b/doc/src/sgml/libpq.sgml index 3706d349abc..8ae168f5f7e 100644 --- a/doc/src/sgml/libpq.sgml +++ b/doc/src/sgml/libpq.sgml @@ -2069,6 +2069,75 @@ postgresql://%2Fvar%2Flib%2Fpostgresql/dbname + + + load_balance_hosts + + + Controls the order in which the client tries to connect to the available + hosts and addresses. It's typically used in combination with multiple + host names or a DNS record that returns multiple IPs. This parameter can + be used in combination with + to, for example, load balance over stanby servers only. Once successfully + connected, subsequent queries on the returned connection will all be + sent to the same server. There are currently two modes: + + + disable (default) + + + Hosts are tried in the order in which they are provided and + addresses are tried in the order they are received from DNS or a + hosts file. + + + + + + random + + + The provided hosts and the addresses that they resolve to are + tried in random order. This value is mostly useful when opening + multiple connections at the same time, possibly from different + machines. This way connections can be load balanced across multiple + Postgres servers. + + + This algorithm uses two levels of random choices: First the hosts + will be resolved in random order. Then before resolving the next + host, all resolved addresses for the current host will be tried in + random order. This behaviour can lead to non-uniform address + selection in certain cases, for instance when some hosts resolve to + more addresses than others. So if you want uniform load balancing, + this is something to keep in mind. However, non-uniform load + balancing can also be used to your advantage, e.g. by providing the + hostname of a larger server multiple times in the host string so it + gets more connections. + + + When using this value it's recommended to also configure a reasonable + value for . Because then, + if one of the nodes that are used for load balancing is not responding, + a new node will be tried. + + + + + + + + + + random_seed + + + Sets the random seed that is used by + to randomize the host order. This option is mostly useful when running + tests that require a stable random order. + + + diff --git a/doc/src/sgml/regress.sgml b/doc/src/sgml/regress.sgml index 719e0a76985..8dda17b1362 100644 --- a/doc/src/sgml/regress.sgml +++ b/doc/src/sgml/regress.sgml @@ -256,7 +256,7 @@ make check-world -j8 >/dev/null PG_TEST_EXTRA to a whitespace-separated list, for example: -make check-world PG_TEST_EXTRA='kerberos ldap ssl' +make check-world PG_TEST_EXTRA='kerberos ldap ssl loadbalance' The following values are currently supported: @@ -290,6 +290,15 @@ make check-world PG_TEST_EXTRA='kerberos ldap ssl' + + loadbalance + + + Runs the test src/interfaces/libpq/t/004_loadbalance_dns.pl. This opens TCP/IP listen sockets. + + + + wal_consistency_checking diff --git a/src/interfaces/libpq/fe-connect.c b/src/interfaces/libpq/fe-connect.c index b085892feac..806a9d69a2d 100644 --- a/src/interfaces/libpq/fe-connect.c +++ b/src/interfaces/libpq/fe-connect.c @@ -123,6 +123,7 @@ static int ldapServiceLookup(const char *purl, PQconninfoOption *options, #define DefaultChannelBinding "disable" #endif #define DefaultTargetSessionAttrs "any" +#define DefaultLoadBalanceHosts "disable" #ifdef USE_SSL #define DefaultSSLMode "prefer" #else @@ -345,6 +346,15 @@ static const internalPQconninfoOption PQconninfoOptions[] = { "Target-Session-Attrs", "", 15, /* sizeof("prefer-standby") = 15 */ offsetof(struct pg_conn, target_session_attrs)}, + {"load_balance_hosts", "PGLOADBALANCEHOSTS", + DefaultLoadBalanceHosts, NULL, + "Load-Balance-Hosts", "", 8, /* sizeof("disable") = 8 */ + offsetof(struct pg_conn, load_balance_hosts)}, + + {"random_seed", NULL, NULL, NULL, + "Random-Seed", "", 10, /* strlen(INT32_MAX) == 10 */ + offsetof(struct pg_conn, randomseed)}, + /* Terminating entry --- MUST BE LAST */ {NULL, NULL, NULL, NULL, NULL, NULL, 0} @@ -429,6 +439,8 @@ static void pgpassfileWarning(PGconn *conn); static void default_threadlock(int acquire); static bool sslVerifyProtocolVersion(const char *version); static bool sslVerifyProtocolRange(const char *min, const char *max); +static bool parse_int_param(const char *value, int *result, PGconn *conn, + const char *context); /* global variable because fe-auth.c needs to access it */ @@ -1013,6 +1025,40 @@ parse_comma_separated_list(char **startptr, bool *more) return p; } +/* + * Initializes the prng_state field of the connection. We want something + * unpredictable, so if possible, use high-quality random bits for the + * seed. Otherwise, fall back to a seed based on timestamp and PID. + */ +static bool +libpq_prng_init(PGconn *conn) +{ + if (unlikely(conn->randomseed)) + { + int rseed; + + if (!parse_int_param(conn->randomseed, &rseed, conn, "random_seed")) + return false; + + pg_prng_seed(&conn->prng_state, rseed); + } + else if (unlikely(!pg_prng_strong_seed(&conn->prng_state))) + { + uint64 rseed; + struct timeval tval = {0}; + + gettimeofday(&tval, NULL); + + rseed = ((uint64) conn) ^ + ((uint64) getpid()) ^ + ((uint64) tval.tv_usec) ^ + ((uint64) tval.tv_sec); + + pg_prng_seed(&conn->prng_state, rseed); + } + return true; +} + /* * connectOptions2 * @@ -1570,6 +1616,50 @@ connectOptions2(PGconn *conn) else conn->target_server_type = SERVER_TYPE_ANY; + /* + * validate load_balance_hosts option, and set load_balance_type + */ + if (conn->load_balance_hosts) + { + if (strcmp(conn->load_balance_hosts, "disable") == 0) + conn->load_balance_type = LOAD_BALANCE_DISABLE; + else if (strcmp(conn->load_balance_hosts, "random") == 0) + conn->load_balance_type = LOAD_BALANCE_RANDOM; + else + { + conn->status = CONNECTION_BAD; + libpq_append_conn_error(conn, "invalid %s value: \"%s\"", + "load_balance_hosts", + conn->load_balance_hosts); + return false; + } + } + else + conn->load_balance_type = LOAD_BALANCE_DISABLE; + + if (conn->load_balance_type == LOAD_BALANCE_RANDOM) + { + if (!libpq_prng_init(conn)) + return false; + + /* + * This is the "inside-out" variant of the Fisher-Yates shuffle + * algorithm. Notionally, we append each new value to the array and + * then swap it with a randomly-chosen array element (possibly + * including itself, else we fail to generate permutations with the + * last integer last). The swap step can be optimized by combining it + * with the insertion. + */ + for (i = 1; i < conn->nconnhost; i++) + { + int j = pg_prng_uint64_range(&conn->prng_state, 0, i); + pg_conn_host temp = conn->connhost[j]; + + conn->connhost[j] = conn->connhost[i]; + conn->connhost[i] = temp; + } + } + /* * Resolve special "auto" client_encoding from the locale */ @@ -2576,6 +2666,32 @@ keep_going: /* We will come back to here until there is } pg_freeaddrinfo_all(hint.ai_family, addrlist); + /* + * If random load balancing is enabled we shuffle the addresses. + */ + if (conn->load_balance_type == LOAD_BALANCE_RANDOM) + { + /* + * This is the "inside-out" variant of the Fisher-Yates shuffle + * algorithm. Notionally, we append each new value to the array + * and then swap it with a randomly-chosen array element (possibly + * including itself, else we fail to generate permutations with + * the last integer last). The swap step can be optimized by + * combining it with the insertion. + * + * We don't need to initialize conn->prng_state here, because that + * already happened in connectOptions2. + */ + for (int i = 1; i < conn->naddr; i++) + { + int j = pg_prng_uint64_range(&conn->prng_state, 0, i); + AddrInfo temp = conn->addr[j]; + + conn->addr[j] = conn->addr[i]; + conn->addr[i] = temp; + } + } + reset_connection_state_machine = true; conn->try_next_host = false; } @@ -4244,6 +4360,8 @@ freePGconn(PGconn *conn) free(conn->outBuffer); free(conn->rowBuf); free(conn->target_session_attrs); + free(conn->load_balance_hosts); + free(conn->randomseed); termPQExpBuffer(&conn->errorMessage); termPQExpBuffer(&conn->workBuffer); diff --git a/src/interfaces/libpq/libpq-int.h b/src/interfaces/libpq/libpq-int.h index cf10ea15aa1..f8e301ae335 100644 --- a/src/interfaces/libpq/libpq-int.h +++ b/src/interfaces/libpq/libpq-int.h @@ -26,7 +26,8 @@ #include #include #include -#ifndef WIN32 +/* MinGW has sys/time.h, but MSVC doesn't */ +#ifndef _MSC_VER #include #endif @@ -82,6 +83,8 @@ typedef struct #endif #endif /* USE_OPENSSL */ +#include "common/pg_prng.h" + /* * POSTGRES backend dependent Constants. */ @@ -242,6 +245,13 @@ typedef enum SERVER_TYPE_PREFER_STANDBY_PASS2 /* second pass - behaves same as ANY */ } PGTargetServerType; +/* Target server type (decoded value of load_balance_hosts) */ +typedef enum +{ + LOAD_BALANCE_DISABLE = 0, /* Use the existing host order (default) */ + LOAD_BALANCE_RANDOM, /* Read-write server */ +} PGLoadBalanceType; + /* Boolean value plus a not-known state, for GUCs we might have to fetch */ typedef enum { @@ -397,6 +407,8 @@ struct pg_conn char *ssl_max_protocol_version; /* maximum TLS protocol version */ char *target_session_attrs; /* desired session properties */ char *require_auth; /* name of the expected auth method */ + char *load_balance_hosts; /* load balance over hosts */ + char *randomseed; /* seed for randomization of load balancing */ /* Optional file to write trace info to */ FILE *Pfdebug; @@ -468,6 +480,8 @@ struct pg_conn /* Transient state needed while establishing connection */ PGTargetServerType target_server_type; /* desired session properties */ + PGLoadBalanceType load_balance_type; /* desired load balancing + * algorithm */ bool try_next_addr; /* time to advance to next address/host? */ bool try_next_host; /* time to advance to next connhost[]? */ int naddr; /* number of addresses returned by getaddrinfo */ @@ -488,6 +502,8 @@ struct pg_conn PGVerbosity verbosity; /* error/notice message verbosity */ PGContextVisibility show_context; /* whether to show CONTEXT field */ PGlobjfuncs *lobjfuncs; /* private state for large-object access fns */ + pg_prng_state prng_state; /* prng state for load balancing connections */ + /* Buffer for data received from backend and not yet processed */ char *inBuffer; /* currently allocated buffer */ diff --git a/src/interfaces/libpq/meson.build b/src/interfaces/libpq/meson.build index 3cd0ddb4945..1b44d49a238 100644 --- a/src/interfaces/libpq/meson.build +++ b/src/interfaces/libpq/meson.build @@ -116,6 +116,8 @@ tests += { 'tests': [ 't/001_uri.pl', 't/002_api.pl', + 't/003_loadbalance_host_list.pl', + 't/004_loadbalance_dns.pl', ], 'env': {'with_ssl': ssl_library}, }, diff --git a/src/interfaces/libpq/t/003_loadbalance_host_list.pl b/src/interfaces/libpq/t/003_loadbalance_host_list.pl new file mode 100644 index 00000000000..547b7d34fa4 --- /dev/null +++ b/src/interfaces/libpq/t/003_loadbalance_host_list.pl @@ -0,0 +1,76 @@ +# Copyright (c) 2023, PostgreSQL Global Development Group +use strict; +use warnings; +use Config; +use PostgreSQL::Test::Utils; +use PostgreSQL::Test::Cluster; +use Test::More; + +# This tests load balancing across the list of different hosts in the host +# parameter of the connection string. + +# Cluster setup which is shared for testing both load balancing methods +my $node1 = PostgreSQL::Test::Cluster->new('node1'); +my $node2 = PostgreSQL::Test::Cluster->new('node2', own_host => 1); +my $node3 = PostgreSQL::Test::Cluster->new('node3', own_host => 1); + +# Create a data directory with initdb +$node1->init(); +$node2->init(); +$node3->init(); + +# Start the PostgreSQL server +$node1->start(); +$node2->start(); +$node3->start(); + +# Start the tests for load balancing method 1 +my $hostlist = $node1->host . ',' . $node2->host . ',' . $node3->host; +my $portlist = $node1->port . ',' . $node2->port . ',' . $node3->port; + +$node1->connect_ok("host=$hostlist port=$portlist load_balance_hosts=random random_seed=1234", + "seed 1234 selects node 1 first", + sql => "SELECT 'connect1'", + log_like => [qr/statement: SELECT 'connect1'/]); + +$node2->connect_ok("host=$hostlist port=$portlist load_balance_hosts=random random_seed=1234", + "seed 1234 does not select node 2 first", + sql => "SELECT 'connect1'", + log_unlike => [qr/statement: SELECT 'connect1'/]); + +$node3->connect_ok("host=$hostlist port=$portlist load_balance_hosts=random random_seed=1234", + "seed 1234 does not select node 3 first", + sql => "SELECT 'connect1'", + log_unlike => [qr/statement: SELECT 'connect1'/]); + +$node3->connect_ok("host=$hostlist port=$portlist load_balance_hosts=random random_seed=42", + "seed 42 selects node 3 first", + sql => "SELECT 'connect2'", + log_like => [qr/statement: SELECT 'connect2'/]); + +$node1->connect_ok("host=$hostlist port=$portlist load_balance_hosts=random random_seed=42", + "seed 42 does not select node 1 first", + sql => "SELECT 'connect2'", + log_unlike => [qr/statement: SELECT 'connect2'/]); + +$node2->connect_ok("host=$hostlist port=$portlist load_balance_hosts=random random_seed=42", + "seed 42 does not select node 2 first", + sql => "SELECT 'connect2'", + log_unlike => [qr/statement: SELECT 'connect2'/]); + +$node3->stop(); + +$node1->connect_ok("host=$hostlist port=$portlist load_balance_hosts=random random_seed=42", + "seed 42 does select node 1 second", + sql => "SELECT 'connect3'", + log_like => [qr/statement: SELECT 'connect3'/]); + +$node2->connect_ok("host=$hostlist port=$portlist load_balance_hosts=random random_seed=42", + "seed 42 does not select node 2 second", + sql => "SELECT 'connect3'", + log_unlike => [qr/statement: SELECT 'connect3'/]); + +$node3->start(); + +done_testing(); + diff --git a/src/interfaces/libpq/t/004_loadbalance_dns.pl b/src/interfaces/libpq/t/004_loadbalance_dns.pl new file mode 100644 index 00000000000..2512c41c466 --- /dev/null +++ b/src/interfaces/libpq/t/004_loadbalance_dns.pl @@ -0,0 +1,103 @@ +# Copyright (c) 2023, PostgreSQL Global Development Group +use strict; +use warnings; +use Config; +use PostgreSQL::Test::Utils; +use PostgreSQL::Test::Cluster; +use Test::More; + +# This tests loadbalancing based on a DNS entry that contains multiple records +# for different IPs. Since setting up a DNS server is more effort than we +# consider reasonable to run this test, this situation is instead immitated by +# using a hosts file where a single hostname maps to multiple different IP +# addresses. This test requires the adminstrator to add the following lines to +# the hosts file (if we detect that this hasn't happend we skip the test): +# +# 127.0.0.1 pg-loadbalancetest +# 127.0.0.2 pg-loadbalancetest +# 127.0.0.3 pg-loadbalancetest +# +# Windows or Linux are required to run this test because these OSes allow +# binding to 127.0.0.2 and 127.0.0.3 addresess by default, but other OSes +# don't. We need to bind to different IP addresses, so that we can use these +# different IP addresses in the hosts file. +# +# The hosts file needs to be prepared before running this test. We don't do it +# on the fly, because it requires root permissions to change the hosts file. In +# CI we set up the previously mentioned rules in the hosts file, so that this +# load balancing method is tested. + +# Cluster setup which is shared for testing both load balancing methods +my $can_bind_to_127_0_0_2 = $Config{osname} eq 'linux' || $PostgreSQL::Test::Utils::windows_os; + +# Checks for the requirements for testing load balancing method 2 +if (!$can_bind_to_127_0_0_2) { + plan skip_all => "OS could not bind to 127.0.0.2" +} + +my $hosts_path; +if ($windows_os) { + $hosts_path = 'c:\Windows\System32\Drivers\etc\hosts'; +} +else +{ + $hosts_path = '/etc/hosts'; +} + +my $hosts_content = PostgreSQL::Test::Utils::slurp_file($hosts_path); + +if ($hosts_content !~ m/pg-loadbalancetest/) { + # Host file is not prepared for this test + plan skip_all => "hosts file was not prepared for DNS load balance test" +} + +if ($ENV{PG_TEST_EXTRA} !~ /\bloadbalance\b/) +{ + plan skip_all => 'Potentially unsafe test loadbalance not enabled in PG_TEST_EXTRA'; +} + +$PostgreSQL::Test::Cluster::use_tcp = 1; +$PostgreSQL::Test::Cluster::test_pghost = '127.0.0.1'; +my $port = PostgreSQL::Test::Cluster::get_free_port(); +my $node1 = PostgreSQL::Test::Cluster->new('node1', port => $port); +my $node2 = PostgreSQL::Test::Cluster->new('node2', port => $port, own_host => 1); +my $node3 = PostgreSQL::Test::Cluster->new('node3', port => $port, own_host => 1); + +# Create a data directory with initdb +$node1->init(); +$node2->init(); +$node3->init(); + +# Start the PostgreSQL server +$node1->start(); +$node2->start(); +$node3->start(); + +$node2->connect_ok("host=pg-loadbalancetest port=$port load_balance_hosts=random random_seed=33", + "seed 33 selects node 2 first", + sql => "SELECT 'connect4'", + log_like => [qr/statement: SELECT 'connect4'/]); + +$node1->connect_ok("host=pg-loadbalancetest port=$port load_balance_hosts=random random_seed=33", + "seed 33 does not select node 1 first", + sql => "SELECT 'connect4'", + log_unlike => [qr/statement: SELECT 'connect4'/]); + +$node3->connect_ok("host=pg-loadbalancetest port=$port load_balance_hosts=random random_seed=33", + "seed 33 does not select node 3 first", + sql => "SELECT 'connect4'", + log_unlike => [qr/statement: SELECT 'connect4'/]); + +$node2->stop(); + +$node1->connect_ok("host=pg-loadbalancetest port=$port load_balance_hosts=random random_seed=33", + "seed 33 does select node 1 second", + sql => "SELECT 'connect5'", + log_like => [qr/statement: SELECT 'connect5'/]); + +$node3->connect_ok("host=pg-loadbalancetest port=$port load_balance_hosts=random random_seed=33", + "seed 33 does not select node 3 second", + sql => "SELECT 'connect5'", + log_unlike => [qr/statement: SELECT 'connect5'/]); + +done_testing(); -- 2.34.1