shardmanctl
shardmanctl — Shardman auxiliary command-line client and deployment tool
Synopsis
shardmanctl
[common_options
] backup
--datadir
[ directory
--maxtasks
] number_of_tasks
--use-ssh
shardmanctl
[common_options
] daemon check
-n
| --nodes
node_names:port
shardmanctl
[common_options
] cleanup
[ -p
| --processrepgroups
] --after-node-operation
--after-rebalance
shardmanctl
[common_options
] config generate
[ -f
| --file
filename
]
shardmanctl
[common_options
] config verify
[ -f
| --file
filename
]
shardmanctl
[common_options
] config get
[ -f
| --file
] [ -c
| --choose-revision
] [ -r
| --revision
]
shardmanctl
[common_options
] config revisions rm
[ -r
| --revision
] [ -y
| --yes
]
shardmanctl
[common_options
] config update
[[ -f
| --file
stolon_spec_file|shardman_spec_file
] |
[ spec_text
--force
] [ -p
| --patch
] [ -w
| --wait
time_duration
]]
shardmanctl
[common_options
] config rollback
[ -r
| --revision
] [ -w
| --wait
time_duration
] [ --force
]
shardmanctl
[common_options
] config update credentials
[ -u
| --user
] [ -p
| --password
] [ -k
| --ssl-key
] [ -c
| --ssl-cert
] [ -w
| --wait
time_duration
] [ -f
| --force
] [ -y
| --yes
]
shardmanctl
[common_options
] config revisions
[ -f
| --format
] json|text
shardmanctl
[common_options
] config revisions set
--keep-config-revisions
shardmanctl
[common_options
] config update ip
[ -u
| ip_1=ip_2,hostname_1=hostname_2
] [ -y
| --yes
]
shardmanctl
[common_options
] config update fdw
[ -y
| --yes
]
shardmanctl
[common_options
] cluster repfactor set --value
value
shardmanctl
[common_options
] cluster start
shardmanctl
[common_options
] cluster stop
[ -y
| --yes
]
shardmanctl
[common_options
] cluster topology
[ -f
| --format
] table|json|text
shardmanctl
[common_options
] forall
--sql
[ query
--twophase
]
shardmanctl
[common_options
] getconnstr
--all
shardmanctl
[common_options
] init
[ -y
| --yes
] [ -f
| --spec-file
spec_file_name
] |
spec_text
shardmanctl
[common_options
] intcheck
[ -s
| --system
] [ -c
| --catalog
] [ -u
| --user
] [ -o
| --output
] [ -n
| --node
node
]
shardmanctl
[common_options
] load
[ -b
| --batch-size
lines_limit
] [ --destination-fields
fields_list
] [ --distributed-keys
key_type_list
] [ -D
| --delimiter
character
] [ --null_marker
string
] [ -e
| --escape
character
] [ -f
| --file
input_file
] [ -F
| --format
text
| csv
] [ -j
| --jobs
task_total
] [ -q
| --quote
character
] [ --reject-file
filename
] [ --schema
filename
] [ --source
file
| postgres
] [ --source-connstr
connect_string
] [ --source-fields
fields_list
] [ --source-table
table|view|func
] [ -t
| --table
destination_table
] [ -h
| --help
]
shardmanctl
[common_options
] nodes add
-n
| --nodes
node_names
[ --no-rebalance
]
shardmanctl
[common_options
] nodes start
-n
| --nodes
node_names
[ --no-wait
]
shardmanctl
[common_options
] nodes restart
-n
| --nodes
node_names
[ --no-wait
]
shardmanctl
[common_options
] nodes stop
-n
| --nodes
node_names
[ --no-wait
]
shardmanctl
[common_options
] nodes replace
--old
old_node
--new
new_node
shardmanctl
[common_options
] nodes rm
-n
| --nodes
node_names
shardmanctl
[common_options
] probackup
[ init
| archive-command
| backup
| checkdb
| delete
| merge
| restore
| set-config
| show
| validate
] [ subcommand_options
]
shardmanctl
[common_options
] rebalance
[ -f
| --force
]
shardmanctl
[common_options
] recover
[ --info
] [ file
--dumpfile
] [ file
--shard
] [ shard
--metadata-only
] [ --schema-only
] [ --timeout
]seconds
shardmanctl
[common_options
] restart
[ -y
| --yes
] [ --no-wait
]
shardmanctl
[common_options
] set pgParam1=value1 [pgParam2=value2 [...]]
[ -y
| --yes
] [ -w
| --wait
time_duration
] [ -f
| --force
]
shardmanctl
[common_options
] shard
-s
| --shard
shard_name
add
-n
| --node
node_names
shardmanctl
[common_options
] shard
-s
| --shard
shard_name
master set
-n
| --node
node_names
shardmanctl
[common_options
] shard
-s
| --shard
shard_name
master reset
shardmanctl
[common_options
] shard
-s
| --shard
shard_name
reset
[ -y
| --yes
] [ --new-primary
| -p
]
shardmanctl
[common_options
] shard
-s
| --shard
shard_name
rm
-n
| --node
node_names
[ -f
| --force
]
shardmanctl
[common_options
] shard
-s
| --shard
shard_name
switch
[ --new-primary
node_names
]
shardmanctl
[common_options
] shard
-s
| --shard
shard_name
start
[ --no-wait
]
shardmanctl
[common_options
] shard
-s
| --shard
shard_name
stop
shardmanctl
[common_options
] shard
-s
| --shard
shard_name
replicas
reinit
[ --no-wait
] [ -y
| --yes
] [ -n
| --node
node_names
]
shardmanctl
[common_options
] status
[ --filter
| all
| dictionary
| primary
| metadata
| rg
| shardmand
| store
] [ topology
-f
| --format
| text
] [ json
-s
| --sort
| node
| rg
]status
shardmanctl
[common_options
] status transactions
[ -r
| --repgroup
replication_group_name
]
shardmanctl
[common_options
] store dump
[ -f
| --file
filename
]
shardmanctl
[common_options
] store get
[ -a
| --alias
| cluster
| ladle
| repgroups
| stolonspec
] [ spec
-k
| --key
keyname
] [ -f
| --file
filename
]
shardmanctl
[common_options
] store keys
shardmanctl
[common_options
] store set
[ -a
| --alias
| cluster
| ladle
| repgroups
| stolonspec
] [ spec
-k
| --key
keyname
] [ -f
| --file
filename
]
shardmanctl
[common_options
] store lock
[ -f
| --format
| text
]json
shardmanctl
[common_options
] tables sharded info
[ -t
| --table
]table
shardmanctl
[common_options
] tables sharded list
shardmanctl
[common_options
] tables sharded norebalance
shardmanctl
[common_options
] tables sharded partmove
[ -t
| --table
] [ table
-s
| --shard
] [ shard_name
-p
| --partnum
]partition_number
shardmanctl
[common_options
] tables sharded rebalance
[ -t
| --table
] [ table
--skip-run-rebalance
]
shardmanctl
[common_options
] upgrade
shardmanctl
[common_options
] bench init
[ --schema-type
] [ single|simple|shardman|custom
-S
| --schema-file
file_name
] [ -s
| --scale
scale_value
] [ --partitions
partitions_value
] [ -n
| --no-vacuum
] [ -F
| --fillfactor
fillfactor_value
]
shardmanctl
[common_options
] bench run
[ --schema-type
] [ single|simple|shardman|custom
-f
| --file
file_name
] [ -c
| --client
client_value
] [ -C
| --connect
] [ --full-output
] [ -j
| --jobs
jobs_value
] [ -s
| --scale
scale_factor
] [ -T
| --time
seconds
] [ -t
| --transactions
transactions_value
] [ -P
| --progress
seconds
] [ -R
| --rate
rate
] [ -M
| --protocol
querymode
]
shardmanctl
[common_options
] bench cleanup
shardmanctl
[common_options
] bench generate
[ -c
| --config
config_file
] [ -o
| --output-file
file_name
]
shardmanctl
[common_options
] script
[ -s
| --shard
shard_name
] [[ -f
| --file
file_name
| --sql
query
]]
shardmanctl
[common_options
] psql
-s
| --shard
shard_name
shardmanctl
[common_options
] daemon set
[ --session-log-level
| debug
| info
| warn
| error
] [ --session-log-format
| text
| json
] [ --session-log-nodes
]
shardmanctl
[common_options
] history
[ -r
| --reverse
] [ -f
| --format
| text
] [ json
-l
| --limit
number_of_commands
]
Here common_options
are:
[ --cluster-name
cluster_name
] [ --log-level
error
| warn
| info
| debug
] [ --monitor-port
] [ port
--retries
] [ retries_number
--session-timeout
] [ seconds
--store-endpoints
] [ store_endpoints
--store-ca-file
] [ store_ca_file
--store-cert-file
] [ store_cert_file
--store-key
] [ client_private_key
--store-timeout
] [ duration
--version
] [ -h
| --help
]
Description
shardmanctl is an utility for managing a Shardman cluster.
For any command that uses the node name as an argument, the node name can be specified either by its hostname or IP address.
The backup
command is used to backup a Shardman cluster. A backup consists of a directory with base backups of all replication groups and WAL files needed for recovery. etcd metadata is saved to the etcd_dump
file. The backup_info
file is created during a backup and contains the backup description. For details of the backup
command logic, see Cluster backup with pg_basebackup. For usage details of the command, see the section called “Backing up a Shardman Cluster”.
The cleanup
command is used for cleanup after failure of the nodes add
command or of the shardmanctl rebalance
command. Final changes to the etcd store are done at the end of the command execution. This simplifies the cleanup
process. During cleanup, incomplete clover definitions and definitions of the corresponding replication groups are removed from the etcd metadata. Definitions of the corresponding foreign servers are removed from the DBMS metadata of the remaining replication groups. Since the cleanup
process can be destructive, by default, the tool operates in the report-only mode: it only shows actions to be done during the actual cleanup. To perform the actual cleanup, add the -p
flag. For usage details of the command, see the section called “Performing Cleanup”.
The daemon check
command is used to verify that shardmand daemon is running on the nodes specified by --nodes
option and is configured for the same cluster as shardmanctl . For usage details of the command, see the section called “Checking shardmand Service on Nodes”.
The init
command is used to register a new Shardman cluster in the etcd store or to reinitialize the existing cluster defining a new cluster configuration and removing all data and nodes. In the init mode, shardmanctl reads the cluster specification, processes it and saves to the etcd store as parts of two JSON documents: ClusterSpec
— as part of shardman/cluster0/data/cluster
and LadleSpec
— as part of shardman/cluster0/data/ladle
( cluster0
is the default cluster name used by Shardman utilities). Common options related to the etcd store, such as --store-endpoints
, are also saved to the etcd store and pushed down to all Shardman services started by shardmand. For the description of the Shardman initialization file format, see sdmspec.json. For usage details of the command, see the section called “Registering a Shardman Cluster”.
The config generate
command is used to create a default sdmspec.json
template. By default, data is returned to the standard output. To write the result to a file, use flag -f
. For the description of the Shardman initialization file format, see sdmspec.json. filename
The config verify
command is used to check a correctness of the input Shardman initialization file. By default, the configuration is read from standard input. To read the configuration from a file, use flag -f
. For the description of the Shardman initialization file format, see sdmspec.json. filename
The config get
command is used to output the current full cluster specification or a configuration of the specified revision. The command takes the current cluster configuration from the cluster store. For the description of the Shardman initialization file format, see sdmspec.json.
The config update
command is used to update the stolon or full Shardman configuration. The new configuration is applied to all replication groups and is saved in shardman/cluster0/data/cluster
etcd key. Note that config update
can cause a DBMS restart.
The forall
command is used to execute an SQL statement on all replication groups in a Shardman cluster.
The getconnstr
command is used to get the libpq connection string for connecting to a cluster as administrator.
The load
command is used to upload data from a text file to a distributed table or to upload a database schema from a PostgreSQL database to Shardman. When loading data from a file, text
and csv
formats are supported. If a file is compressed with gzip, it will be automatically decoded while reading. To read data from stdin, specify --file=
. The data loading process can be optimized by specifying the number of parallel workers (key -
-j
).
The nodes add
command is used to add new nodes to a Shardman cluster. With the default cross
placement policy, nodes are added to a cluster by clovers. Each node in a clover runs the primary DBMS instance and perhaps several replicas of other nodes in the clover. The number of replicas is determined by the Repfactor configuration parameter. So, each clover consists of Repfactor + 1
nodes and can stand loss of Repfactor
nodes.
With manual placement policy, each new node is added as a replication group consisting of one primary server. After adding primary nodes, you can add replicas to the new replication group by calling the shard add
command.
shardmanctl performs the nodes add
operation in several steps:
Acquires a global metadata lock.
For each specified node, checks that shardmand is running on it and that it sees the current cluster configuration.
Calculates the services to be present on each node and saves this information in etcd as part of the
shardman/cluster0/data/ladle
Layout
object.Generates the configuration for new stolon clusters (also called replication groups) and initializes them.
Registers the added replication groups in the
shardman/cluster0/data/ladle
etcd key.Waits for shardmand to start all the necessary services, checks that new replication groups are accessible and have correct configuration.
Creates an auxiliary broadcaster that holds locks on each existing replication group in the cluster.
For each new replication group, copies all schemas and
shardman
schema data from a randomly selected existing replication group to the new one, ensures that the Shardman extension is installed on the new replication group, and recalculates OIDs used in the extension configuration tables.On each existing replication group, defines foreign servers referencing the new replication group and recreates definitions of foreign servers on the new replication group.
Recreates all partitions of sharded tables as foreign tables referencing data from old replication groups and has the changes registered in the etcd storage.
For each new replication group, copies the global table data from existing replication groups to the new one.
Rebalances partitions of sharded tables. The rebalancing process for each sharded table iteratively determines the replication group with the maximum and minimum number of partitions and creates a task to move one partition to the replication group with the minimum number of partitions. This process is repeated while
max - min > 1
. To move partitions, we use logical replication. Partitions of colocated tables are moved together with partitions of the distributed tables to which they refer. You can skip this step using the--no-rebalance
.
For usage details of the command, see the section called “Adding Nodes to a Shardman Cluster”.
The nodes rm
command is used to remove nodes from a Shardman cluster. In the manual-topology mode, this command only removes the specified nodes from the cluster and if a node is the last in the replication group, the entire group gets removed. In the cross-replication mode, this command removes clovers containing the specified nodes from the cluster. The last clover in the cluster cannot be removed. Any data (such as partitions of sharded relations) on removed replication groups is migrated to the remaining replication groups using logical replication, and all references to the removed replication groups (including definitions of foreign servers) are removed from the metadata of the remaining replication groups. Finally, the metadata in etcd is updated. For usage details of the command, see the section called “Removing Nodes from a Shardman cluster”.
The probackup
command is used to backup and restore the Shardman cluster using pg_probackup backup utility. For details of the probackup
command logic, see Backup anf Recovery Shardman Backups using pg_probackup. For usage details of the command, see the section called “ probackup
”.
The rebalance
command is used to evenly rebalance sharded tables in a cluster. This can be useful, for example, if you did not perform rebalance when adding nodes to the cluster. If the --force
option is not provided, then tables with manually moved partitions will be skipped.
The cleanup
command with flag --after-rebalance is used to perform cleanup after failure of a rebalance
command. On each node, it cleans up subscriptions and publications left from the rebalance
command and drops tables that store data of partially-transferred partitions of sharded tables.
The cluster repfactor set
command is used to set the value of the replication factor for the Shardman cluster. This command can only be used in manual
topology cluster mode. The value of the new replication factor is passed through the command line flag --value
.repfactor
The cluster start
command is used to start all stopped PostgreSQL instances with the cluster stop
command. For the command to work, shardmand must be running.
The cluster stop
command is used to stop all PostgreSQL instances for the Shardman cluster. At the same time, the shardmand daemons continue to work.
The cluster topology
command is used visualize the topology of a cluster. By default, the topology is returned in a table view. If you want to get a JSON or text representation, then use the flag --format
. json
|text
The recover
command is used to restore a Shardman cluster from a backup created by the backup
command. For details of the recover
command logic, see Cluster recovery from a backup using pg_basebackup. For usage details of the command, see the section called “Restoring a Shardman Cluster”.
The restart
command is used to restart a Shardman cluster, including all shardmand instances. If PostgreSQL instances were previously stopped using the cluster stop
command, they will be started. The command returns control after all primary nodes in the cluster have been restarted.
The set
command is used to set one or more parameters for DBMS instances of the Shardman cluster. Parameters are passed as arguments to the command line, each of them looks like param=value
. The command is actually an alternative to shardmanctl config update -p
to update database settings.
The status
command is used to display health status of Shardman cluster subsystems. It can show status of several components: store, metadata, shardmand, replication groups, primary nodes and dictionary. If only some subsystems are of interest, option --filter
may be used. Also status
supports sorting its messages by status
, node
or replication group
and printing the result to stdout as a table (table
), text (text
) or JSON
(json
) with table
as the default. For usage details of the command, see the section called “Getting the Status of Cluster Subsystems”.
The store dump
command gets all the keys and their values from the etcd store and outputs them into the --file
, where -
value is used for outputting to stdout (default). It is intended to be used for debugging, so some harmless errors may be produced during execution, yet all the available information will be dumped. Only keys for the current cluster (with current cluster prefix like shardman/cluster0/
) will be dumped. For usage details of the command, see the section called “Dumping All Keys from the Store to Debug Error Configuration”.
The store get
command gets a particular value from the store by its key name. It is expected to be a JSON value, so if it is not (which is not prohibited), some harmless errors may be produced. The key to retrieve from store can be specified with --key
option; several keys have aliases — short names for easy use. To get a key by its alias, use --alias
option with one of the available aliases (use --help
or examples below for reference). Also aliases stolonspec and spec can be used to manipulate initial cluster and stolon configuration explicitly, without retrieving it from the full cluster specification. It is recommended to use existing aliases instead of full key names since there are some additional checks in alias processing, which help to achieve safer results. By default, a key is printed to stdout (explicitly — with --file=
option), but can be output to any desired file. For usage details of the command, see the section called “Getting the Current stolon Specification”. -
The store keys
command shows all the keys in the store for the current cluster (with cluster prefix) and its aliases. Aliases stolonspec
and spec
are not shown since they are parts of other keys. For usage details of the command, see the section called “Getting the Cluster and Ladle Key Names For the Current Cluster”.
The store set
command creates or rewrites one particular key in the store. It is not expected to be a JSON value for a random key, but if it is one of the keys that have aliases with a known mapping (like ladle
or cluster
), the command will not accept incorrect JSON structures. Just like store get
command, store set
accepts a key name via --key
or --alias
option and the input source file as --file
(stdin is specified with -
value). For usage details of the command, see the section called “Setting a New Spec for the Cluster”.
The store lock
command show the current cluster meta lock information. In case lock does not exist returns Lock not found
. Displays cluster id, command that acquired locks, host name and lock time. You can specify --format
to output in json
format or in text
format (by default). For usage details of the command, see the section called “Output Current Cluster Meta Lock Information”.
The upgrade
command is used to update the version of Postgresql shardman extension on all cluster nodes. Before upgrading extensions, you need to install new packages and run the restart
command. As a result of upgrade
, utilities will upgrade shardman and all the other extensions on the server.
Sometimes after running the upgrade
command or some user's manual manipulations, dictionary errors may appear in the output of the status
command. One of the reasons for these errors is that the value of the srvoptions
field of the pg_foreign_server
table differs from what the system expects. To solve this specific issue, use the config update fdw
command, which will return srvoptions
to the expected state.
Note
Most of the described shardmanctl commands take a global metadata lock.
Command-line Reference
This section describes shardmanctl
commands. For Shardman common options used by the commands, see the section called “Common Options”.
backup
Syntax:
shardmanctl [common_options
] backup --datadirdirectory
[--maxtasksnumber_of_tasks
] [--use-ssh]
Backs up a Shardman cluster.
-
--datadir
#directory
Required.
Specifies the directory to write the output to. If the directory exists, it must be empty. If it does not exist, shardmanctl creates it (but not parent directories).
-
--maxtasks
#number_of_tasks
Specifies the maximum number of concurrent tasks (
pg_probackup
commands) to run.Default: number of logical CPUs of the system.
-
--use-ssh
# If specified shardmanctl
recover
command will usescp
command to restore data. It allows to use backup repository on the local host.
For more details, see the section called “Backing up a Shardman Cluster”
cleanup
Syntax:
shardmanctl [common_options
] cleanup [-p|--processrepgroups] --after-node-operation|--after-rebalance
Performs cleanup after the nodes add
or rebalance
command.
-
-p
node_names
--processrepgroups=
#node_names
Perform an actual cleanup. By default, the tool only shows actions to be done during the actual cleanup. For more details, see the section called “Performing Cleanup”.
-
--after-node-operation
# Perform cleanup after a failure of a
nodes add
command.-
--after-rebalance
# Perform cleanup after a failure of a
rebalance
command.
config update credentials
Syntax:
shardmanctl [common_options
] config update credentials [-u | --user] [-p | --password] [-k | --ssl-key] [-c | --ssl-cert] [-w|--wait time_duration] [--force] [-y | --yes]
Updates password or certificate/key of a user to connect to a Shardman cluster. It only updates the authentication type that was specified by the user (scram-sha-256
, ssl
) and not the type itself.
-
-u
--user
# User that requires an update of the authentication parameters.
-
-p
--password
# New password.
-
-k
--ssl-key
# New SSL key.
-
-c
--ssl-cert
# New SSL certificate.
-
-w
--wait
# Sets shardmanctl to wait for configuration changes to take effect. If a new configuration cannot be loaded by all replication groups, shardmanctl will wait forever.
-
--force
# Perform forced update if a cluster operation is in progress.
-
-y
--yes
# Confirm the operation instead of asking approval from the standard input.
cluster repfactor set
Syntax:
shardmanctl [common_options
] cluster repfactor set --valuenew_repfactor
Sets the replication factor for the manual-topology mode.
-
--value=
#new_repfactor
New replication factor value
cluster start
Syntax:
shardmanctl [common_options
] cluster start
Starts all PostgreSQL server instances.
cluster stop
Syntax:
shardmanctl [common_options
] cluster stop [-y|--yes]
Stops all PostgreSQL server instances.
-
-y
--yes
# confirm the operation instead of asking approval from the standard input.
cluster topology
Syntax:
shardmanctl [common_options
] cluster topology -f|--formattable|json|text
Displays the cluster topology.
-
-f
table|json|text
--format=
#table|json|text
Output format. For more details, see the section called “Displaying the Cluster Topology”.
daemon check
Syntax:
shardmanctl [common_options
] daemon check -n|--nodesnode_name:port
Checks shardmand on nodes.
-
-n
node_name:port
--nodes=
#node_name:port
List of nodes to check shardmand on. For more details, see the section called “Checking shardmand Service on Nodes”.
forall
Syntax:
shardmanctl [common_options
] forall --sqlquery
[ --sqlquery
[ --sqlquery
...]] [--twophase]
Executes an SQL statement on all replication groups in a Shardman cluster.
getconnstr
Syntax:
shardmanctl [common_options
] getconnstr --all
Gets the libpq connection string for connecting to a cluster as administrator.
-
--all
# Adds replicas to
getconnstr
.
init
Syntax:
shardmanctl [common_options
] init [-y|--yes] [-f|--spec-filespec_file_name
]|spec_text
Registers a new Shardman cluster in the etcd store or reinitializes the existing cluster defining a new cluster configuration and removing all data and nodes.
-
-f
spec_file_name
--specfile=
#spec_file_name
Specifies the file with the cluster specification string. The value of
-
means the standard input. By default, the string is passed inspec_text
. For usage details, see the section called “Registering a Shardman Cluster”.-
-y
--yes
# Confirm the operation instead of asking approval from the standard input.
intcheck
Syntax:
shardmanctl [common_options
] intcheck [-s|--system] [-u|--user] [-c|--catalog] [-o|--output] [-n|--nodenode
]
Runs pg_integrity_check on all nodes of a Shardman cluster or on a selected one node.
-
-s
--system
# Validate checksums for read-only files. Checksums for read-only files control both file contents and file attributes.
-
-u
--user
# Validate checksums for additional files. Checksums for additional files control both file contents and file attributes.
-
-c
--catalog
# Validate checksums for system catalog tables. For the
-c
option to work correctly, the database server must be started and accept connections.-
-o
--output
# Recalculate checksums and write them into a file
-
-n
node_names
--node=
#node_names
Only execute the pg_integrity_check command on the selected node
load
Syntax:
shardmanctl [common_options
] load [ -b | --batch-sizelines_limit
] [ --destination-fieldsfields_list
] [ --distributed-keyskey_type_list
] [ -D | --delimitercharacter
] [--null_markerstring
] [ -e | --escapecharacter
] [ -f | --fileinput_file
] [ -F | --formattext|csv
] [ -j | --jobstask_total
] [ -q | --quotecharacter
] [ --reject-filefilename
] [ --schemafilename
] [ --sourcefile|postgres
] [ --source-connstrconnect_string
] [ --source-fieldsfields_list
] [ --source-tablesource_table
] [ -t | --tabledestination_table
]
Loads data to a Shardman cluster.
-
-b
lines_limit
--batch-size=
#lines_limit
Number of rows per batch to write to the Shardman cluster.
Default:
1000
.-
--destination-fields=
#fields_list
Comma-separated list of target table fields. If the value is not set, then all fields of the table are used in the order they are declared.
-
--distributed-keys=
#key_type_list
Comma-separated list of pairs. Each pair consists of a field number (starting with zero) and a type, which are separated by a colon. The following types are supported:
bool
,char
,float4
,float8
,int2
,int4
,int8
,name
,text
,varchar
anduuid
.-
-D
character
--delimiter=
#character
Specifies the character that separates columns within each row (line) of the file. This must be a single one-byte character.
Default: tab for text format, comma for CSV format
-
--null_marker=
#string
Specifies the string that represents a null value.
Default: \N for text format, unquoted empty string for CSV format.
-
-e
character
--escape=
#character
Specifies the character that should appear before a data character that matches the QUOTE value. The default is the same as the QUOTE value (so that the quoting character is doubled if it appears in the data). This must be a single one-byte character. This option is allowed only when using CSV format.
-
-f
filename
--file=
#filename
Input data filename (or
-
for stdin)-
-F
text|csv
--format=
#text|csv
Input data format. Possible values are
text
andcsv
.Default:
text
.-
-j
number
--jobs=
#number
Number of parallel processes to load data.
Default: number of replication groups.
-
-q
character
--quote=
#character
Specifies the quoting character to be used when a data value is quoted. The default is double-quote. This must be a single one-byte character. This option is allowed only when using CSV format.
-
--reject-file=
#filename
All data batches with errors during upload will be written to this file. If the value is not set, then such batches will be skipped.
-
--schema=
#filename
The schema that defines the rules for transferring data from PostgreSQL to Shardman. If this option is set, then all other options are not used.
-
--source=
#file|postgres
Data source type —
file
orpostgres
.Default:
file
.-
--source-connstr=
#string
Data source database connection string
-
--source-fields=
#fields_list
Comma-separated list of source table fields. If the value is not set, then all fields of the table are used in the order they are declared.
-
--source-table=
#table
Source table, view or function (
funcname(
).param1
,...,paramN
)-
-t
table
--table=
#table
Destination table.
nodes add
Syntax:
shardmanctl [common_options
] nodes add -n|--nodesnode_names
[--no-rebalance]
Adds nodes to a Shardman cluster.
-
-n
node_names
--nodes=
#node_names
Required.
Specifies the comma-separated list of nodes to be added.
-
--no-rebalance
# Skip the step of rebalancing partitions of sharded tables. For more details, see the section called “Adding Nodes to a Shardman Cluster”.
nodes rm
Syntax:
shardmanctl [common_options
] nodes rm -n|--nodesnode_names
Removes nodes from a Shardman cluster.
-
-n
node_names
--nodes=
#node_names
Specifies the comma-separated list of nodes to be removed. For usage details, see the section called “Removing Nodes from a Shardman cluster”.
probackup
Syntax:
shardmanctl [common_options
] probackup [init|archive-command|backup|checkdb|delete|merge|restore|set-config|show|validate] [--log-to-console][--help] [subcommand_options
]
Creates a backup of a Shardman cluster and restores the Shardman cluster from a backup using pg_probackup.
List of subcommands:
-
init
# Initializes a new repository folder for the Shardman cluster backup and creates a configuration file on all nodes for connection to the backup storage if
--storage-type
isS3
.-
archive-command
# Adds
archive_command
to each replication group (or to a single one if the--shard
option is specified) and enables or disables it in the Shardman cluster.-
backup
# Creates a backup of the Shardman cluster.
-
checkdb
# Verifies the Shardman cluster correctness by detecting physical and logical corruption.
-
delete
# Deletes a backup with a specified ID and the archived WAL files that are no longer in use.
-
merge
# Merges the backups that belong to a common incremental backup chain. The full backup merges the backups with their first incremental backup. The incremental backup merges the backups with their parent full backup, along with all the incremental backups between them. Once the merge is complete, the full backup covers all the merged data, and the incremental backups are removed as redundant. In this version, you cannot run the
merge
command using the S3 interface.-
restore
# Restores the Shardman cluster from the selected backup.
-
show
# Shows the list of backups of the Shardman cluster.
-
validate
# Checks the selected Shardman cluster backup for integrity.
-
set-config
# Adds the specified settings to the
pg_probackup.conf
or modifies those previously added.
The following options can be used with all probackup
subcommands:
-
--log-to-console
# Outputs a full
probackup
log to the console. By default, for each replication group theprobackup
log file is written to the backup directory (see--backup-path
below) as the<backup-directory>/backup/log/pg_probackup-<repgroup-name>.log
file. The log rotation file size is 20MB. If this value is reached, the log file is rotated once ashardmanctl probackup validate
orshardmanctl probackup backup
command is launched.-
--help
# Shows subcommand help.
init
Syntax:
shardmanctl [common_options
] probackup init -B|--backup-pathpath
-E|--etcd-pathpath
[--remote-portport
] [--remote-userusername
] [--ssh-keypath
] [-t|--timeoutseconds
] [-m|--maxtasksnumber_of_tasks
] [--storage-typemount|remote|S3
] [--s3-config-only] [--s3-config-pathpath
] [--s3-hostS3_host
] [--s3-portS3_port
] [--s3-access-keyS3_access_key
] [--s3-secret-keyS3_secret_key
] [--s3-bucketS3_bucket
] [--s3-regionS3_region
] [--s3-buffer-sizesize
] [--s3-retriesnumber_of_retries
] [--s3-timeouttime
] [--s3-https] [-y|--yes]
Initializes a new repository folder for the Shardman cluster backup.
-
-B
path
--backup-path
#path
Required if
--s3-config-only
is not used. Specifies the path to the backup catalog where Shardman cluster backups should be stored.-
-E
path
--etcd-path
#path
Required if
--s3-config-only
is not used. Specifies the path to the catalog where the etcd dumps should be stored.-
--remote-port
#port
Specifies the remote ssh port for replication group instances.
Default:
22
.-
--remote-user
#username
Specifies the remote ssh user for replication group instances.
Default:
postgres
.-
--ssh-key
#path
Specifies the ssh private key for execution of remote ssh commands.
Default:
$HOME/.ssh/id_rsa
.-
--storage-type
#mount|remote|S3
Type of the backup storage. If the value is
remote
, SSH is used to copy data files to the remote backup directory. But this behavior is different if a directory mounted to all nodes or an S3-compatible object storage is used to store backups. To specify these kinds of storage, the value of the--storage-type
option is set tomount
orS3
, respectively.Default:
remote
.-
--s3-config-path
#path
Specifies the path where the S3 configuration file will be created on all Shardman nodes.
Default:
<shardman-data-dir>/s3.config
.-
--s3-config-only
# Create only S3 configuration files on all nodes and skip backup repository initialization. This flag is useful if the value of
--storage-type
isS3
.-
--s3-host
#host
Specifies the S3 host to connect to S3-compatible storage.
-
--s3-port
#port
Specifies the S3 port to connect to S3-compatible storage.
-
--s3-access-key
#access-key
Specifies the S3 access key to connect to S3-compatible storage.
-
--s3-secret-key
#access-key
Specifies the S3 secret key to connect to the S3-compatible storage.
-
--s3-bucket
#bucket
Specifies the bucket in the S3-compatible object storage for storing backups.
-
--s3-region
#bucket
Specifies the region in the S3-compatible object storage.
-
--s3-buffer-size
#size
Size of the read/write buffer for pg_probackup to communicate with the S3-compatible object storage, in MiB.
Default: 16.
-
--s3-retries
#number_of_retries
Maximum number of attempts for pg_probackup to execute an S3 request in case of failures.
Default: 5.
-
--s3-timeout
#time
Maximum allowable amount of time for pg_probackup to transfer data of size
--s3-buffer-size
to/from the S3-compatible object storage, in seconds.Default: 300.
-
--s3-https
# Specifies the HTTPS URL to connect to the S3-compatible object storage.
-
-y|--yes
# Approve the operation regardless of whether the file specified in
--s3-config-path
exists.
archive-command
Syntax:
shardmanctl [common_options
] probackup archive-command [add|rm] -B|--backup-pathpath
[-j|--jobscount
] [--compress] [--compress-algorithmalgorithm
] [--compress-levellevel
] [--batch-sizebatch_size
] [--storage-typemount|remote|S3
] [--remote-portport
] [--remote-userusername
] [-s|--shardshard-name
] [--s3-config-pathpath
]
Adds/removes and enables/disables the archive command for every replication group in the Shardman cluster to put WAL logs into the initialized backup repository.
-
add
# Adds and enables the
archive
command for every replication group in the Shardman cluster.-
rm
# Disables the
archive
command in every replication group in the Shardman cluster. No additional options are required.-
-B
path
--backup-path
#path
Required when adding
archive_command
. Specifies the path to the backup catalog where the Shardman cluster backups should be stored.-
--batch-size
#batch_size
To speed up the archiving, specify the
--batch-size
option to copy the WAL segments in batches of a specified size. If the--batch-size
option is used, it is also possible to specify the-j
option to copy a batch of the WAL segments on multiple threads.-
--jobs
count
-j
#count
The number of parallel threads that pg_probackup uses when creating a backup. Default: 1.
-
--compress
# Enables backup compression. If this flag is not specified, compression will be disabled. If the flag is specified, the default
zstd
algorithm is used with the compression level set to 1, while other compression options are ignored even if they are specified.-
--compress-algorithm
#algorithm
Defines the compression algorithm:
zlib
,lz4
,zstd
,pglz
, ornone
. Once defined, it checks if the values are valid within the scale of the defined algorithm.The supported compression algorithms depend on the version of Postgres Pro Enterprise that includes the pg_probackup used, as explained in Compression Options.
Default:
none
.-
--compress-level
#level
Defines the compression level — 0-9 for
zlib
, 1 forpglz
, 0-22 forzstd
, and 0-12 forlz4
.Default: 1.
-
--storage-type
#mount|remote|S3
Type of the backup storage. If the value is
remote
, SSH is used to copy data files to the remote backup directory. But this behavior is different if a directory mounted to all nodes or an S3-compatible object storage is used to store backups. To specify these kinds of storage, the value of the--storage-type
option is set tomount
orS3
, respectively.Default:
remote
.-
--remote-port
#port
Specifies the remote ssh port for replication group instances.
Default:
22
.-
--remote-user
#username
Specifies the remote ssh user for replication group instances.
Default:
postgres
.-
-s|--shard
#shard-name
Specifies the name of the shard where the
archive
command must be added, enabled or disabled. If not specified, thearchive
command is enabled or disabled for every shard.-
--s3-config-path
#path
Specifies the path to the S3 configuration file.
Default:
<shardman-data-dir>/s3.config
.
backup
Syntax:
shardmanctl [common_options
] probackup backup -B|--backup-pathpath
-E|--etcd-pathpath
-b|--backup-modeMODE
[-j|--jobscount
] [--compress] [--compress-algorithmalgorithm
] [--compress-levellevel
] [--batch-sizebatch_size
] [--storage-typemount|remote|S3
] [--remote-portport
] [--remote-userusername
] [--ssh-keypath
] [-t|--timeoutseconds
] [-m|--maxtasksnumber_of_tasks
] [--log-directorypath
] [--s3-config-pathpath
] [--no-validate] [--skip-block-validation] [--log-to-console]
Creates a backup of the Shardman cluster.
-
-B
path
--backup-path
#path
Required. Specifies the path to the backup catalog where Shardman cluster backups should be stored.
-
-E
path
--etcd-path
#path
Required. Specifies the path to the catalog where the etcd dumps should be stored.
-
-b
MODE
--backup-mode
#MODE
Required. Defines the backup mode:
FULL
,PAGE
,DELTA
,PTRACK
.-
--batch-size
#batch_size
To speed up the archiving, specify the
--batch-size
option to copy the WAL segments in batches of a specified size. If the--batch-size
option is used, it is also possible to specify the-j
option to copy a batch of the WAL segments on multiple threads.-
--jobs
count
-j
#count
The number of parallel threads that pg_probackup uses when creating a backup. Default: 1.
-
--compress
# Enables backup compression. If this flag is not specified, compression will be disabled. If the flag is specified, the default
zstd
algorithm is used with the compression level set to 1, while other compression options are ignored even if they are specified.-
--compress-algorithm
#algorithm
Defines the compression algorithm:
zlib
,lz4
,zstd
,pglz
, ornone
.The supported compression algorithms depend on the version of Postgres Pro Enterprise that includes the pg_probackup used, as explained in Compression Options.
Default:
none
.-
--compress-level
#level
Defines the compression level — 0-9 for
zlib
, 1 forpglz
, 0-22 forzstd
, and 0-12 forlz4
.Default: 1.
-
--remote-port
#port
Specifies the remote ssh port for replication group instances.
Default:
22
.-
--remote-user
#username
Specifies the remote ssh user for replication group instances.
Default:
postgres
.-
--ssh-key
#path
Specifies the ssh private key for execution of remote ssh commands.
Default:
$HOME/.ssh/id_rsa
.-
-t
seconds
--timeout
#seconds
Exit with error after waiting until the cluster is ready for the specified number of seconds.
-
-m
number_of_tasks
--maxtasks
#number_of_tasks
Specifies the maximum number of concurrent tasks (
pg_probackup
commands) to run.Default: number of logical CPUs of the system.
-
--no-validate
# Skip automatic validation after the backup is taken. You can use this flag if you validate backups regularly and would like to save time when running backup operations.
Default:
false
.-
--skip-block-validation
# Disables block-level checksum verification to speed up the backup process.
Default:
false
.-
--storage-type
#mount|remote|S3
Type of the backup storage. If the value is
remote
, SSH is used to copy data files to the remote backup directory. But this behavior is different if a directory mounted to all nodes or an S3-compatible object storage is used to store backups. To specify these kinds of storage, the value of the--storage-type
option is set tomount
orS3
, respectively.Default:
remote
.-
--log-to-console
# Enables output of the pg_probackup logs to the console.
Default:
false
.-
--log-directory
#path
Specifies the directory for pg_probackup logs. Required if
--storage-type
is set toS3
unless theSDM_LOG_DIRECTORY
environment variable is set.Default:
<backup-directory>/backup/log
.-
--s3-config-path
#path
Specifies the path to the S3 configuration file.
Default:
<shardman-data-dir>/s3.config
.
checkdb
Syntax:
shardmanctl [common_options
] probackup checkdb [--amcheck [--skip-block-validation] [--heapallindexed]] [--shardshard
] [-m|--maxtasksnumber_of_tasks
]
Verifies the Shardman cluster correctness by detecting physical and logical corruption.
--amcheck
#Performs logical verification of indexes if no corruption was found while checking data files. You must have the amcheck extension or the amcheck_next extension installed in the database to check its indexes. For databases without amcheck, index verification will be skipped. The amcheck extension is included with the Shardman package.
--heapallindexed
#Checks that all heap tuples that should be indexed are actually indexed. You can use this flag only together with the
--amcheck
flag. This option is effective depending on the version of amcheck/amcheck_next installed. The amcheck extension included in the Shardman package supports this verification.--skip-block-validation
#Skip validation of data files. You can use this flag only together with the
--amcheck
flag, so that only logical verification of indexes is performed.-
--shard
#shard
Perform the verification only on the specified shard. By default, the verification is performed on all shards.
-
-m
number_of_tasks
--maxtasks
#number_of_tasks
Specifies the maximum number of concurrent tasks (
pg_probackup
commands) to run.Default: number of logical CPUs of the system.
delete
Syntax:
shardmanctl [common_options
] probackup delete -B|--backup-pathpath
-i|--backup-idbackup_id
[-j|--jobscount
] [-m|--maxtasksnumber_of_tasks
] [--storage-typemount|remote|S3
] [--s3-config-pathpath
] [--delete-wal] [-y|--yes]
Deletes backup of the Shardman cluster with specified backup_id
.
-
-B
path
--backup-path
#path
Required. Specifies the path to the backup catalog (or key in the bucket of the S3-compatible storage) where Shardman cluster backups should be stored.
-
-i
backup_id
--backup-id
#backup_id
Required. Specifies the unique identifier of the backup.
-
--jobs
count
-j
#count
The number of parallel threads that pg_probackup uses when creating a backup. Default: 1.
-
-m
number_of_tasks
--maxtasks
#number_of_tasks
Specifies the maximum number of concurrent tasks (
pg_probackup
commands) to run.Default: number of logical CPUs of the system.
-
--storage-type
#mount|remote|S3
Type of the backup storage. If the value is
remote
, SSH is used to copy data files to the remote backup directory. But this behavior is different if a directory mounted to all nodes or an S3-compatible object storage is used to store backups. To specify these kinds of storage, the value of the--storage-type
option is set tomount
orS3
, respectively.Default:
remote
.To delete the backup that was created with a
--storage-type
option with aS3
value, set a--storage-type
option to aS3
value in thedelete
command.-
--s3-config-path
#path
Specifies the path to the S3 configuration file.
Default:
<shardman-data-dir>/s3.config
.-
--delete-wal
# Deletes WAL files that are no longer required to restore the cluster from any of the existing backups.
Default:
false
.-
-y
--yes
# Approve operation.
Default:
false
.
delete
Syntax:
shardmanctl [common_options
] probackup merge -B|--backup-pathpath
-i|--backup-idbackup_id
[-j|--jobscount
] [-m|--maxtasksnumber_of_tasks
] [--no-validate] [--no-sync] [-y|--yes]
Merges the backups that belong to a common incremental backup chain. The full backup merges the backups with their first incremental backup. The incremental backup merges the backups with their parent full backup, along with all the incremental backups between them. Once the merge is complete, the full backup covers all the merged data, and the incremental backups are removed as redundant.
-
-B
path
--backup-path
#path
Required. Specifies the path to the backup catalog where Shardman cluster backups should be stored.
-
-i
backup_id
--backup-id
#backup_id
Required. Specifies the unique identifier of the backup.
-
--jobs
count
-j
#count
The number of parallel threads that pg_probackup uses when creating a backup. Default: 1.
-
-m
number_of_tasks
--maxtasks
#number_of_tasks
Specifies the maximum number of concurrent tasks (
pg_probackup
commands) to run.Default: number of logical CPUs of the system.
-
--no-sync
# Do not sync merged files to disk. You can use this flag to speed up the merge process. Using this flag can result in data corruption in case of operating system or hardware crash.
Default:
false
.-
--no-validate
# Skip automatic validation before and after merge.
Default:
false
.-
-y
--yes
# Approve the operation.
Default:
false
.
restore
Syntax:
shardmanctl [common_options
] probackup restore -B|--backup-pathpath
-i|--backup-idid
-j|--jobscount
[--recovery-target-timetimestamp
] [-I|--recovery-modeincremental_mode
] [-t|--timeoutseconds
] [-m|--maxtasksnumber_of_tasks
] [--metadata-only] [--schema-only] [--shardshard
] [--no-validate] [--skip-block-validation] [--s3-config-pathpath
] [--storage-typemount|remote|S3
] [--wal-limitnumber_of_wal_segments
] [--log-directorypath
]
Restores a Shardman cluster from the selected backup.
-
-B
path
--backup-path
#path
Required. Specifies the path to the backup catalog where Shardman cluster backups should be stored.
-
-i
id
--backup-id
#id
Required. Specifies backup ID for restore.
-
--jobs
count
-j
#count
The number of parallel threads that pg_probackup uses when restoring from a backup. Default: 1.
-
--recovery-target-time
#timestamp
Point-in-Time Recovery (PITR) option. Specifies the timestamp for restore. Example: '2024-01-25 15:30:36' in UTC.
-
-I
incremental_mode
--recovery-mode
#incremental_mode
Specifies the incremental restore mode to be used. Possible values are:
checksum
— replace only pages with mismatched checksum and LSN.lsn
— replace only pages with LSN greater than point of divergence.none
— regular restore, default.
-
-t
seconds
--timeout
#seconds
Exit with error after waiting until the cluster is ready or the recovery is complete for the specified number of seconds.
-
--metadata-only
# Perform metadata-only restore. By default, full restore is performed.
-
--schema-only
# Perform schema-only restore. By default, full restore is performed.
-
--shard
#shard
Perform restoring only on the specified shard. By default, restoring is performed on all shards.
-
--no-validate
# Skip backup validation. You can use this flag if you validate backups regularly and would like to save time when running restore operations.
Default:
false
.-
--skip-block-validation
# Disable block-level checksum verification to speed up validation. During automatic validation before the restore only file-level checksums will be verified.
Default:
false
.-
--s3-config-path
#path
Specifies the path to the S3 configuration file.
Default:
<shardman-data-dir>/s3.config
.-
--storage-type
#mount|remote|S3
Type of the backup storage. If the value is
remote
, SSH is used to copy data files to the remote backup directory. But this behavior is different if a directory mounted to all nodes or an S3-compatible object storage is used to store backups. To specify these kinds of storage, the value of the--storage-type
option is set tomount
orS3
, respectively. When creating backup with a--storage-type
option with aS3
value, set--storage-type
option to aS3
value in therestore
command.Default:
remote
.-
--wal-limit
#number_of_wal_segments
Specifies the number of WAL segments in which the closest synchronization points will be searched in the case of PITR.
Default: 0 — no limit.
-
--log-directory
#path
Specifies the directory for pg_probackup logs. Required if
--storage-type
is set toS3
unless theSDM_LOG_DIRECTORY
environment variable is set.Default:
<backup-directory>/backup/log
.
show
Syntax:
shardmanctl [common_options
] probackup show -B|--backup-pathpath
[-f|--formattable
|json
] [--archive ] [-i|--backup-idbackup-id
] [--instanceinstance
] [--storage-typemount|remote|S3
] [--s3-config-pathpath
]
Shows the list of backups of the Shardman cluster.
-
-B
path
--backup-path
#path
Required. Specifies the path to the backup catalog where Shardman cluster backups should be stored.
-
-f
table
|json
--format
#table
|json
Specifies the output format.
Default:
table
.-
--archive
# Shows the WAL archive information.
-
-i
backup-id
--backup-id
#backup-id
Shows information about the specific backups.
-
--instance
#instance
Shows information about the specific instance.
-
--s3-config-path
#path
Specifies the path to the S3 configuration file.
Default:
<shardman-data-dir>/s3.config
.-
--storage-type
#mount|remote|S3
Type of the backup storage. If the value is
remote
, SSH is used to copy data files to the remote backup directory. But this behavior is different if a directory mounted to all nodes or an S3-compatible object storage is used to store backups. To specify these kinds of storage, the value of the--storage-type
option is set tomount
orS3
, respectively. To show a backup that was created with theS3
value of--storage-type
, set--storage-type
toS3
in theshow
command.Default:
remote
.
validate
Syntax:
shardmanctl [common_options
] probackup validate -B|--backup-pathpath
-i|--backup-idid
[-t|--timeoutseconds
] [-m|--maxtasksnumber_of_tasks
] [--log-to-console] [--storage-typemount|remote|S3
] [--s3-config-pathpath
] [--log-directorypath
]
Checks the selected Shardman cluster backup for integrity.
-
-B
path
--backup-path
#path
Required. Specifies the path to the backup catalog where Shardman cluster backups should be stored.
-
-i
id
--backup-id
#id
Required. Specifies backup ID for validation.
-
--log-to-console
# Enables output of pg_probackup logs to the console.
Default:
false
.-
-t
seconds
--timeout
#seconds
Exit with error after waiting until the cluster is ready for the specified number of seconds.
-
-m
number_of_tasks
--maxtasks
#number_of_tasks
Specifies the maximum number of concurrent tasks (
pg_probackup
commands) to run.Default: number of logical CPUs of the system.
-
--s3-config-path
#path
Specifies the path to the S3 configuration file.
Default:
<shardman-data-dir>/s3.config
.-
--storage-type
#mount|remote|S3
Type of the backup storage. If the value is
remote
, SSH is used to copy data files to the remote backup directory. But this behavior is different if a directory mounted to all nodes or an S3-compatible object storage is used to store backups. To specify these kinds of storage, the value of the--storage-type
option is set tomount
orS3
, respectively. To validate a backup that was created with theS3
value of--storage-type
, set--storage-type
toS3
in thevalidate
command.Default:
remote
.-
--log-directory
#path
Specifies the directory for pg_probackup logs. Required if
--storage-type
is set toS3
unless theSDM_LOG_DIRECTORY
environment variable is set.Default:
<backup-directory>/backup/log
.
set-config
Syntax:
shardmanctl [common_options
] probackup set-config [--archive-timeoutint
] [-B | --backup-pathstring
] [-m |--maxtasksint
] [--remote-portint
] [--remote-userstring
] [--retention-redundancyint
] [--retention-windowint
] [--wal-depthint
] [--s3-config-pathstring
] [-s |--shardstring
] [--storage-typestring
]
Adds the specified settings to the pg_probackup.conf
or modifies those previously added.
-
--archive-timeout
int
# Sets a timeout for the WAL segment archiving and streaming, in seconds.
Default: pg_probackup waits for 300 seconds.
-
-B
string
--backup-path=
#string
Specifies the absolute path to the backup catalog.
Default: current value of the
pg_probackup.conf
.-
-m
int
--maxtasks=
#int
Specifies the maximum number of concurrent tasks (
pg_probackup
commands) to run.Default: number of logical CPUs of the system.
-
--remote-port
int
# An SSH remote backup port.
Default: current value of the
pg_probackup.conf
.-
--remote-user
string
# An SSH remote backup user.
Default: current value of the
pg_probackup.conf
.-
--retention-redundancy
int
# Specifies the number of the full backup copies to store in the data directory. It must be set to a non-negative integer. The zero value disables this setting.
Default: current value of the
pg_probackup.conf
.-
--retention-window
int
# A number of days of recoverability. It must be set to a non-negative integer. The zero value disables this setting.
Default: current value of the
pg_probackup.conf
.-
--wal-depth
int
# A number of the latest valid backups on every timeline that must retain the ability to perform PITR. Must be set to a non-negative integer. The zero value disables this setting.
Default: current value of the
pg_probackup.conf
.-
--s3-config-path
string
# A path to the S3 configuration file.
Default:
/var/lib/pgpro/sdm-14/data/s3.config
-
-s
string
--shard=
#string
A name of the shard to make the
set-config
command for. If not specified, the command is run for all the shards.Default: current value of the
pg_probackup.conf
.-
--storage-type
string
# A backup storage type, the possible values are
remote
,mount
,S3
.Default:
remote
.
rebalance
Syntax:
shardmanctl [common_options
] rebalance [-f|--force]
Rebalances sharded tables.
-
-f
--force
# Perform forced rebalance of sharded tables whose partitions were manually moved.
recover
Syntax:
shardmanctl [common_options
] recover [--infofile
] [--dumpfilefile
] [--shardshard
] [--metadata-only][--schema-only] [--timeoutseconds
]
Restores a Shardman cluster from a backup created by the backup
command.
-
--dumpfile
#file
Required for metadata-only restore.
Specifies the file to load the etcd metadata dump from.
-
--info
#file
Required for full restore.
Specifies the file to load information about the backup from.
-
--shard
#shard
Perform restoring only on the specified shard. By default, restoring is performed on all shards.
-
--metadata-only
# Perform metadata-only restore. By default, full restore is performed.
-
--schema-only
# Perform schema-only restore. By default, full restore is performed.
-
--timeout
#seconds
Exit with error after waiting until the cluster is ready or the recovery is complete for the specified number of seconds.
For more details, see the section called “Restoring a Shardman Cluster”
restart
Syntax:
shardmanctl [common_options
] restart [-y|--yes] [--no-wait]
Restarts a Shardman cluster.
shard add
Syntax:
shardmanctl [common_options
] shard -s|--shardshard_name
add -n|--nodesnode_names
[--no-wait]
Adds a replica to a shard.
shard master set
Syntax:
shardmanctl [common_options
] shard -s|--shardshard_name
master set -n| nodenode_names
Sets the precedence for a certain primary server for a specified shard.
shard master reset
Syntax:
shardmanctl [common_options
] shard -s|--shardshard_name
master reset
Resets the parameters of the master with precedence for the shard.
shard add
Syntax:
shardmanctl [common_options
] shard -s|--shardshard_name
reset [--yes | -y][--new-primary | -p]
Resets nodes of a replication group if they are in a state of hanging.
shard rm
Syntax:
shardmanctl [common_options
] shard -s|--shardshard_name
rm -n|--nodesnode_names
[-f|--force]
Removes a replica from a shard.
shard switch
Syntax:
shardmanctl [common_options
] shard -s|--shardshard_name
switch [--new-primarynode_names
]
Switches the primary node.
shard start
Syntax:
shardmanctl [common_options
] shard -s |--shardshard_name
start [--no-wait]
Starts the shard.
-
-s
shard_name
--shard=
shard_name
Shard name.
-
--no-wait
Do not wait for the shard to start.
shard stop
Syntax:
shardmanctl [common_options
] shard -s |--shardshard_name
stop
Stops the shard.
-
-s
shard_name
--shard=
shard_name
Shard name.
shard replicas reinit
Syntax:
shardmanctl [common_options
] shard -s|--shardshard_name
replicas reinit [-n|--nodenode_names
] [-y|--yes] [--no-wait]
Resets replicas of a specific shard.
-
-s
shard_name
--shard=
#shard_name
Shard name.
-
-n
node_names
--node=
#node_names
Specifies the node on which to reset replicas. If not specified, checks shard replicas on all nodes.
-
-y
--yes
# Confirm the operation instead of asking approval from the standard input.
-
--no-wait
# Do not wait wait for replicas to become ready.
For more details, see the section called “Reinitializing Replicas”
nodes start
Syntax:
shardmanctl [common_options
] nodes start -n|--nodesnode_names
[--no-wait]
Starts the nodes.
-
-n
node_names
--nodes=
node_names
Node names.
-
--no-wait
Sets shardmanctl not to wait for the nodes to start.
nodes restart
Syntax:
shardmanctl [common_options
] nodes restart -n|--nodesnode_names
[--no-wait]
Restarts the nodes.
-
-n
node_names
--nodes=
node_names
Node names.
-
--no-wait
Do not wait for the nodes to restart.
nodes stop
Syntax:
shardmanctl [common_options
] nodes stop -n|--nodesnode_names
[--no-wait]
Stops the nodes.
-
-n
node_names
--nodes=
node_names
Node names.
-
--no-wait
Do not wait for the nodes to stop.
status
Syntax:
shardmanctl [common_options
] status [-f|--formattable
|json
] [--filterstore
|metadata
|shardmand
|rg
|master
|dictionary
|all
] [-s|--sortnode
|rg
|status
]
Reports on the health status of Shardman cluster subsystems.
-
-f
table
|json
--format=
#table
|json
Specifies the report format.
Default:
table
.For more details, see the section called “Getting the Status of Cluster Subsystems”.
-
--filter
#store
|metadata
|shardmand
|rg
|master
|dictionary
|all
Specifies subsystems whose status information should be included in the output.
Default:
all
.For more details, see the section called “Getting the Status of Cluster Subsystems”.
-
-s
node
|rg
|status
--sort
#node
|rg
|status
Sort messages inside one group (table) as specified.
Default:
node
.For more details, see the section called “Getting the Status of Cluster Subsystems”.
status transactions
Syntax:
shardmanctl [common_options
] status transactions [-r|--repgroupreplication_group_name
Shows distributed transactions that Shardman built-in monitoring tools failed to resolve.
-
-r
replication_group_name
--repgroup=
#replication_group_name
Specifies the replication group for which to output transactions.
Default: all replication groups.
For more details, see the section called “Outputting the List of Unresolved Distributed Transactions”.
store dump
Syntax:
shardmanctl [common_options
] store dump [-f|--filefilename
]
Dumps current cluster specifications from the store.
-
-f
filename
--file=
#filename
Specifies the output file (
-
for stdout).Default:
-
.For more details, see the section called “Dumping All Keys from the Store to Debug Error Configuration”.
store lock
Syntax:
shardmanctl [common_options
] store lock [-f|--formattext
|json
]
Shows the current cluster meta lock information.
-
-f=
text
|json
--format=
#text
|json
Specifies the output format.
Default:
text
.For more details, see the section called “Output Current Cluster Meta Lock Information”.
store get
Syntax:
shardmanctl [common_options
] store get [[-a|--aliasaliasname
]|[-k|--keykeyname
] [-f|--filefilename
]]
Gets the specified key from the store.
-
-a
aliasname
--alias=
#ladle
|cluster
|spec
|stolonspec
Specifies the use of alias instead of the full key name. Cannot be used with
--key
.For more details, see the section called “Getting the Current stolon Specification”.
-
-k
keyname
--key=
#keyname
Specifies the key to retrieve from the store. Cannot be used with
--alias
.For more details, see the section called “Getting the Current stolon Specification”.
-
-f
filename
--file=
#filename
Specifies the file to print the value to.
Default:
-
(stdout).For more details, see the section called “Getting the Current stolon Specification”.
store keys
Syntax:
shardmanctl [common_options
] store keys
Gets all keys with the current cluster prefix from the store.
For more details, see the section called “Getting the Cluster and Ladle Key Names For the Current Cluster”.
store set
Syntax:
shardmanctl [common_options
] store set [[-a|--aliasaliasname
]|[-k|--keykeyname
]] [-f|--filefilename
]
Creates or rewrites a key in the store.
-
-a
ladle
|cluster
|spec
|stolonspec
--alias=
#ladle
|cluster
|spec
|stolonspec
Specifies the use of alias instead of the full key name. Cannot be used with
--key
.-
-k
keyname
--key=
#keyname
Specifies the key name to set in the store. Cannot be used with
--alias
.-
-f
filename
--file=
#filename
Specifies the file with input data (
-
for stdin).For more details, see the section called “Setting a New Spec for the Cluster”.
tables sharded info
Syntax:
shardmanctl [common_options
] tables sharded info [-t|--tabletable_name
]
Gets information about a sharded table.
-
-t
table
--table=
#table
Specifies the name of the table in the format
schema.table
tables sharded list
Syntax:
shardmanctl [common_options
] tables sharded list
Gets the list of all sharded tables.
tables sharded norebalance
Syntax:
shardmanctl [common_options
] tables sharded norebalance
Gets the list of sharded tables with automatic rebalancing disabled.
tables sharded partmove
Syntax:
shardmanctl [common_options
] tables sharded partmove [-t|--tabletable_name
] [-s|--shardshard_name
] [-p|--partnumnumber
]
Moves the specified partition of a sharded table to a new shard.
tables sharded rebalance
Syntax:
shardmanctl [common_options
] tables sharded rebalance [-t|--tabletable_name
]
Enables and runs automatic data rebalancing for the selected sharded table.
-
-t
table
--table=
#table
Specifies the name of the table in the format
schema.table
.
config get
Syntax:
shardmanctl [common_options
] config get [-c | --choose-revision] [-r | --revision ] [-f | --file]
Outputs the current full cluster specification or a configuration of the specified revision.
config revisions rm
Syntax:
shardmanctl [common_options
] config revisions rm [-r | --revision ] [-y | --yes]
Deletes a specified configuration revision from history.
config update
Syntax:
shardmanctl [common_options
] config update [[-f|--filestolon_spec_file|shardman_spec_file
]|spec_text
[-p|--patch][-w|--wait]] [--force]
Updates the stolon or full Shardman configuration.
-
-f
stolon_spec_file|shardman_spec_file
--specfile=
#stolon_spec_file|shardman_spec_file
Specifies the file with the stolon or full Shardman configuration. The configuration file type is determined automatically. The value of
-
means the standard input. By default, the configuration is passed inspec_text
.-
-w
--wait
# Sets shardmanctl to wait for configuration changes to take effect. If a new configuration cannot be loaded by all replication groups, shardmanctl will wait forever.
-
-p
--patch
# Merge the new configuration into the existing one. By default, the new configuration replaces the existing one.
-
--force
# Perform forced update if a cluster operation is in progress.
config rollback
Syntax:
shardmanctl [common_options
] config rollback [-r | --revision] [-w|--waittime_duration
] [--force] [-y|--yes]
Makes a rollback of Shardman to one of the previous states. When rolling back to the config revision that has max_connections
, max_prepared_transactions
, or max_worker_processes
parameters, the replicas are reinitialized.
-
-r
--revision
# ID of a revision the rollback must be made to. It is a timestamp of an operation that resulted in Shardman configuration change.
If not specified, a user is presented with a list of revisions that he can choose from.
-
-w
--wait
# Sets shardmanctl to wait for configuration changes to take effect. If a new configuration cannot be loaded by all replication groups, shardmanctl will wait forever.
Default:
1h
.-
-f
--force
# Perform forced setting of a parameter if a cluster operation is in progress.
-
-y
--yes
# Perform automatic confirmation.
config revisions
Syntax:
shardmanctl [common_options
] config revisions [-f|--formattext
|json
]
Outputs the revision history of the Shardman cluster configuration. It has the following information for each revision:
revision_id
— timestamp of the command that resulted in the Shardman cluster configuration changehost
— name of the host from which this command was executeduser
— user who executed this commandcommand
— the command itself
-
-f=
text
|json
--format=
#text
|json
Specifies the output format.
Default:
text
.
config revisions set
Syntax:
shardmanctl [common_options
] config revisions set [--keep-config-revisions]
Allows setting the length of the configuration revision history. This length cannot be lower than 5, in which case it is automatically set to 5. For Shardman clusters where the configuration revision history was not collected yet, the length is automatically set to 20.
-
--keep-config-revisions
# A limit on the number of revisions for one Shardman configuration. If the limit is lower than the current history length, the older versions out of this limit will be deleted. Also, if the number of operations resulting in configuration changes exceeds the limit, the oldest revision is deleted.
Default:
20
.
config update ip
Syntax:
shardmanctl [common_options
] config update ip [-u|ip_1=ip_2,hostname_1=hostname_2][-y|--yes]
Updates the specified node IPs in the cluster.
set
Syntax:
shardmanctl [common_options
] setpgParam1=value1 [pgParam2=value2 [...]]
[-y|--yes] [-w|--waittime_duration
] [-f|--force]
Sets the values of the specified Shardman cluster database parameters.
upgrade
Syntax:
shardmanctl [common_options
] upgrade
Upgrades the shardman database extension and updates pg_foreign_server
options.
bench init
Syntax:
shardmanctl [common_options
] bench init [--schema-typesingle|simple|shardman|custom
] [--schema-filefile_name
] [-s|--scalescale_value
] [-n|--no-vacuum] [-F|--fillfactorfillfactor_value
]
Initializes the benchmark schema via pgbench. Schema can be custom or predefined. Creates tpc-b
schema tables and fills them.
-
--schema-type=
#single|simple|shardman|custom
Type of schema used by schema initialization. Possible values:
single
— schema for a single PostgreSQL benchmark testsimple
— simple sharded schemashardman
— sharded schema optimized for Shardmancustom
— schema initialized by the user from the--schema-file
file
Default schema:
shardman
.-
--schema-file=
#file_name
File with DDL query for the custom schema type, to be used to create
tpc-b
tables for pgbench:pgbench_accounts
,pgbench_branches
,pgbench_tellers
,pgbench_history
.-
-s
scale_value
--scale=
#scale_value
Multiply the number of generated rows by the given scale factor.
-
-n
--no-vacuum
# Perform no vacuuming during initialization.
-
-F
fillfactor_value
--fillfactor=
#fillfactor_value
Fill pgbench tables with the given fillfactor value.
bench run
Syntax:
shardmanctl [common_options
] bench run [--schema-typesingle|simple|shardman|custom
] [-f|--filefile_name
] [-c|--clientclient_value
] [-C|--connect] [--full-output] [-j|--jobsjobs_value
][-T|--timeseconds
][-t|--transactionstransactions_value
] [-s|--scalescale_factor
] [ -P | --progressseconds
] [ -R | --raterate
] [ -M | --protocolquerymode
]
Runs the initialized benchmark via pgbench. Can use the default pgbench script or a custom script from a file.
-
--schema-type=
#single|simple|shardman|custom
Type of schema used by schema initialization (
bench init
). Possible values:single
— schema for single PostgreSQL benchmarksimple
— simple sharded schemashardman
— sharded schema optimized for Shardmancustom
— schema initialized by the user from the--schema-file
file.
Default schema:
shardman
.-
-f
file_name
--file=
#file_name
Add a transaction script read from
filename
to the list of scripts to be executed.Optionally, write an integer weight after
@
to adjust the probability of selecting this script versus other ones. The default weight is1
. (To use a script file name that includes an@
character, append a weight so that there is no ambiguity, for examplefilen@me@1
).-
-c
client_value
--client=
#client_value
Number of clients simulated, that is, number of concurrent database sessions.
-
-C
--connect
# Establish a new connection for each transaction rather than doing it just once per client session.
-
--full-output
# Print all pgbench output.
-
-j
jobs_value
--jobs=
#jobs_value
Number of worker threads within pgbench.
-
-s
scale_factor
--scale=
#scale_factor
Multiply the number of generated rows by + the given scale factor.
-
-T
seconds
--time=
#seconds
Run the test for this many seconds instead of a fixed number of transactions per client.
-
-t
transactions_value
--transactions=
#transactions_value
Number of transactions each client runs.
Default: 10.
-
-P
seconds
--progress=
#seconds
Show progress report every
sec
seconds. The report includes the time since the beginning of the run, the TPS since the last report, and the transaction latency average, standard deviation, and the number of failed transactions since the last report. Under throttling (-R
), the latency is computed with respect to the transaction scheduled start time, not the actual transaction beginning time, thus it also includes the average schedule lag time. When--max-tries
is used to enable transaction retries after serialization/deadlock errors, the report includes the number of retried transactions and the sum of all retries.-
-R
rate
--rate=
#rate
Execute transactions targeting the specified rate instead of running as fast as possible (the default). The rate is given in transactions per second. If the targeted rate is above the maximum possible rate, the rate limit won't impact the results.
-
-M
querymode
--protocol=
#querymode
Protocol to use for submitting queries to the server:
simple
: use simple query protocol.extended
: use extended query protocol.prepared
: use extended query protocol with prepared statements.
In the
prepared
mode, pgbench reuses the parse analysis result starting from the second query iteration, so pgbench runs faster than in other modes.Default:
simple
.
bench cleanup
Syntax:
shardmanctl [common_options
] bench cleanup
Cleans up schema database after benchmarks. Drops tpc-b
tables.
bench generate
Syntax:
shardmanctl [common_options
] bench generate [-c|--configfile_name
] [-o|--output-filefile_name
]
Gets the benchmark configuration from a file and generates a bash script to create a schema optimized for Shardman and run the benchmark using pgbench. The configuration file must be in yaml
format.
-
-f
file_name
--file=
#file_name
The configuration file path. The file contains a sequence of script confugurations. Each script must have a
schema_type
:single|simple|shardman|custom
. For a custom schema it is necessary to specify theschema_file
with the DDL script. Optional parameters:init_flags
(default set:-s 1000
),run_flags
(default set:-n -P 10 -c 10 -j 4 -T 60
),partitions
(default value:50
). It is highly recomended to use-n
(--no-vacuum
) parameter insiderun_flags
. Configuration file example:benches: - schema_type: single init_flags: "-s 3" run_flags: "-n -P 10 -c 10 -j 4 -T 10" - schema_type: simple init_flags: "-s 4" run_flags: "-n -P 10 -c 20 -j 4 -T 10" partitions: 100 - schema_type: shardman init_flags: "-s 5" run_flags: "-n -P 10 -c 20 -j 4 -T 10" - schema_type: custom init_flags: "-s 6" schema_file: "schema.psql"
-
-o
file_name
--output-file=
#file_name
Output file. Default: stdout.
script
Syntax:
shardmanctl [common_options
] script -s|--shardshard_name
][[-f|--filefile_name
][--sqlquery
]]
Executes non-transactional commands from a file or from the command-line on the specified shards.
psql
Syntax:
shardmanctl [common_options
] psql -s|--shardshard_name
Connects to the first available primary node if no options are specified.
-
-s
shard_name
--shard=
#shard_name
Name of the shard. If specified, the connection is installed with this shard current primary.
daemon set
Syntax:
shardmanctl [common_options
] daemon set [--session-log-leveldebug | info | warn | error
] [--session-log-formatjson|text
] [--session-log-nodes]
Allows updating the log parameters “on the fly”.
-
--session-log-level
#debug | info | warn | error
Updates the log level to
debug
,info
,warn
, orerror
.-
--session-log-format
#json|text
Updates the log output format to
text
orjson
.-
--session-log-nodes
# Specifies which cluster nodes must be updated. If not specified, the parameters are updated on every node.
Default: all nodes.
history
Syntax:
shardmanctl [common_options
] history [--reverse | -r] [-f|--formatjson|text
] [-l|--limitnumber_of_commands
]
Shows history of the commands that updated the cluster. By default, they are sorted from the most recent to the oldest ones.
Common Options
shardmanctl common options are optional parameters that are not specific to the utility. They specify etcd connection settings, cluster name and a few more settings. By default shardmanctl tries to connect to the etcd store 127.0.0.1:2379
and use the cluster0
cluster name. The default log level is info
.
-
-h, --help
# Show brief usage information.
-
--cluster-name
#cluster_name
Specifies the name for a cluster to operate on. The default is
cluster0
.-
--log-level
#level
Specifies the log verbosity. Possible values of
level
are (from minimum to maximum):error
,warn
,info
anddebug
. The default isinfo
.-
--retries
#number
Specifies how many times shardmanctl retries a failing etcd request. If an etcd request fails, most likely, due to a connectivity issue, shardmanctl retries it the specified number of times before reporting an error. The default is 5.
-
--session-timeout
#seconds
Specifies the session timeout for shardmanctl locks. If there is no connectivity between shardmanctl and the etcd store for the specified number of seconds, the lock is released. The default is 30.
-
--store-endpoints
#string
Specifies the etcd address in the format:
http[s]://
. The default isaddress
[:port
](,http[s]://address
[:port
])*http://127.0.0.1:2379
.-
--store-ca-file
#string
Verify the certificate of the HTTPS-enabled etcd store server using this CA bundle.
-
--store-cert-file
#string
Specifies the certificate file for client identification by the etcd store.
-
--store-key
#string
Specifies the private key file for client identification by the etcd store.
-
--store-timeout
#duration
Specifies the timeout for a etcd request. The default is 5 seconds.
-
--monitor-port
#number
Specifies the port for the shardmand http server for metrics and probes. The default is 15432.
-
--api-port
#number
Specifies the port for the shardmand http api server. The default is 15432.
-
--version
# Show shardman-utils version information.
Environment
-
SDM_BACKUP_MODE
# An alternative to setting the
--backup-mode
option.-
SDM_BACKUP_PATH
# An alternative to setting the
--backup-path
option.-
SDM_CLUSTER_NAME
# An alternative to setting the
--cluster-name
option.-
SDM_ETCD_PATH
# An alternative to setting the
--etcd-path
option.-
SDM_FILE
# An alternative to setting the
--file
option forconfig update
.-
SDM_LOG_LEVEL
# An alternative to setting the
--log-level
option.-
SDM_NODES
# An alternative to setting the
--nodes
option fornodes add
andnodes rm
.-
SDM_RETRIES
# An alternative to setting the
--retries
option.-
SDM_SPEC_FILE
# An alternative to setting the
--spec-file
option forinit
.-
SDM_STORE_ENDPOINTS
# An alternative to setting the
--store-endpoints
option.-
SDM_STORE_CA_FILE
# An alternative to setting the
--store-ca-file
option.-
SDM_STORE_CERT_FILE
# An alternative to setting the
--store-cert-file
option.-
SDM_STORE_KEY
# An alternative to setting the
--store-key
option.-
SDM_STORE_TIMEOUT
# An alternative to setting the
--store-timeout
option.-
SDM_SESSION_TIMEOUT
# An alternative to setting the
--session-timeout
option.
Usage
Adding Nodes to a Shardman Cluster
To add nodes to a Shardman cluster, run the following command:
shardmanctl [common_options
] nodes add -n|--nodesnode_names
You must specify the -n
(--nodes
) option to pass the comma-separated list of nodes to be added. Nodes can be referred by their hostname or IP address. Hostnames must be correctly resolved on all nodes.
If nodes add
command fails during execution, use the cleanup --after-node-operation
command to fix possible cluster configuration issues.
Performing Cleanup
By default, cleanup
operates in the report-only mode, that is, the following command will only show actions to be done during actual cleanup:
shardmanctl [common_options
] cleanup --after-node-operation|--after-rebalance
To perform the actual cleanup, run the following command:
shardmanctl [common_options
] cleanup -p|--processrepgroups --after-node-operation|--after-rebalance
Displaying the Cluster Topology
cluster topology
displays the current cluster topology. The default is the table mode. All cluster nodes will be grouped by the replication groups they belong to. For each node, its status will be displayed.
shardmanctl [common_options
] cluster topology -f|--formattable|json|text
Checking shardmand Service on Nodes
daemon check
not only checks that shardmand service is running on specified nodes, but also assures those services are configured for the same cluster as shardmanctl:
shardmanctl [common_options
] daemon check -n|--nodesnode_names
Removing Nodes from a Shardman cluster
To remove nodes from a Shardman cluster, run the following command:
shardmanctl [common_options
] nodes rm -n|--nodesnode_names
Specify the -n
(--nodes
) option to pass the comma-separated list of nodes to be removed.Recreates all partitions of sharded tables
Note
Do not use the cleanup
command to fix possible cluster configuration issues after a failure of nodes rm
. Redo the nodes rm
command instead.
To remove all nodes in a cluster and not care about the data, just reinitialize the cluster. If a removed replication group contains local (non-sharded and non-global) tables, the data is silently lost after the replication group removal.
Getting the Status of Cluster Subsystems
To get a report on the health status of Shardman cluster in a table format for metadata
and store
subsystems sorted by replication group, run the following command:
shardmanctl [common_options
] status --filter=metadata,store --sort=rg
To get the report in JSON format, use -f|--format=json
option (omitted above since table
format is used by default). Each detected issue is reported as an Unknown, Warning, Error or Fatal error status. The tool can also report an Operational error, which means there was an issue during the cluster health check. When the command encounters a Fatal or Operational error, it stops further diagnostics. For example, an inconsistency in the store metadata does not allow correct cluster operations and must be handled first.
Outputting the List of Unresolved Distributed Transactions
To view the list of distributed transactions that Shardman built-in monitoring tools failed to resolve, run the following command:
shardmanctl [common_options
] status transactions -r|--repgroupreplication_group_name
Each output transaction consists of tx_id
(transaction ID), coordinator_id
, creation_time
and description
(error or transaction status). To display the list of transactions for a specific replication group, use the -r|--repgroup
option (for all replication groups by default). In case there are no such transactions, returns null
value in JSON.
Dumping All Keys from the Store to Debug Error Configuration
After facing an error while using Shardman cluster, to fill in an exhaustive report, it is convinient to dump all specifications that could produce such an error with the following command:
shardmanctl [common_options
] store dump -f|--filefilename
Some harmless errors may be shown, but they will not interrupt dumping. If you do not specify the filename, dump will be sent to stdout and may pollute your terminal.
Getting the Current stolon Specification
To get the current stolon specification, which is normally a part of cluster key in the store, use the following command:
shardmanctl [common_options
] store get -a|--alias stolonspec -f|--filefilename
If the cluster key is corrupted itself, stolon specification will not be shown either. Instead of using the alias, you may also find out the full cluster data key name (by listing all keys with store keys
command), use store get
to retrieve it and find the stolon part there. Mind that while using the last option, shardman.config_uuid
parameter will not be deleted, which may result in a conflict in later use of this data; for manipulation with stolon specification, it is recommended to use shardmanctl store get -a stolonspec
command.
Getting the Cluster and Ladle Key Names For the Current Cluster
To get all key names in the store at once, run the following command:
shardmanctl [common_options
] store keys
It can only be shown in JSON format. It will also print alias names for keys that have them (excluding stolonspec
and spec
, since they are parts of other keys)
Output Current Cluster Meta Lock Information
You can view information about current cluster meta locks that acquired by any command:
shardmanctl [common_options
] store lock -f|--formatjson
To get the report in JSON format, use -f|--format=json
option (omitted above since text
format is used by default). In case the lock does not exists returns Lock not found
Setting a New Spec for the Cluster
To set a new spec part of the cluster specification, run the following command:
shardmanctl [common_options
] store set --alias=spec --file=spec.json
Since spec
is a part of cluster data key, it cannot be set with --key
. If the provided file is not a valid JSON, the new spec part will not be set.
Backing up a Shardman Cluster
Requirements for backing up and restoring a Shardman cluster using the basebackup
command are listed in Section 2.6.1.1.
To backup a Shardman cluster, you can run the following command:
shardmanctl [common_options
] backup --datadirdirectory
[--use-ssh]
You must pass the directory to write the output to through the --datadir
option. You can limit the number of running concurrent tasks (pg_receivewal
or pg_basebackup
commands) by passing the limit through the --maxtasks
option.
If --use-ssh
is specified shardmanctl recover
command will use scp
command to restore data. It allows to use backup repository on the local host.
Registering a Shardman Cluster
To register a Shardman cluster in the etcd store, run the following command:
shardmanctl [common_options
] init [-y|--yes] [-f|--spec-filespec_file_name
]|spec_text
You must provide the string with the cluster specification. You can do it as follows:
On the command line — do not specify the
-f
option and pass the string inspec_text
.On the standard input — specify the
-f
option and pass-
inspec_file_name
.In a file — specify the
-f
option and pass the filename inspec_file_name
.
Restoring a Shardman Cluster
shardmanctl can perform either full restore, metadata-only or schema-only restore of a Shardman cluster from a backup created by the backup
command.
To perform full restore, you can run the following command:
shardmanctl [common_options
] recover --infofile
Pass the file to load information about the backup from through the --info
option. In most cases, set this option to point to the backup_info
file in the backup directory or to its modified copy.
If you encounter issues with an etcd instance, it makes sense to perform metadata-only restore. To do this, you can run the following command:
shardmanctl [common_options
] recover --dumpfilefile
--metadata-only
You must pass the file to load the etcd metadata dump from through the --dumpfile
option.
If you need to restore only schema information, like: tables, roles and etc. you should specify --schema-only
option.
For all kinds of restore, you can specify --timeout
for the tool to exit with error after waiting until the cluster is ready or the recovery is complete for the specified number of seconds.
You can specify --shard
parameter for restoring only on the single shard.
Before running the recover
command, specify DataRestoreCommand
and RestoreCommand
in the backup_info
file. DataRestoreCommand
fetches the base backup and restores it to the stolon data directory. RestoreCommand
fetches the WAL file and saves it to stolon pg_wal
directory. These commands can use the following substitutions:
stolon keeper
thread runs both commands on each node in the cluster. Therefore:
Make the backup accessible to these nodes (for example, by storing it in a shared filesystem or by using a remote copy protocol, such as SFTP).
Commands to fetch the backup are executed as the operating system user under which stolon daemons work (usually
postgres
), so set the permissions for the backup files appropriately.
These examples show how to specify RestoreCommand
and DataRestoreCommand
:
If a backup is available through a passwordless SCP, you can use:
"DataRestoreCommand": "scp -r user@host:/var/backup/shardman/%s/backup/* %p", "RestoreCommand": "scp user@host:/var/backup/shardman/%s/wal/%f %p"
If a backup is stored on NFS and available through
/var/backup/shardman
path, you can use:"DataRestoreCommand": "cp -r /var/backup/shardman/%s/backup/* %p", "RestoreCommand": "cp /var/backup/shardman/%s/wal/%f %p"
Backing up a Shardman Cluster Using probackup
Command
Requirements for backing up and restoring a Shardman cluster using the probackup
command are listed in Section 2.6.3.1.
For example, following these requirements, on the backup host:
groupadd postgres useradd -m -N -g postgres -r -d /var/lib/postgresql -s /bin/bash
Then add SSH keys to provide passwordless SSH connection between the backup host and Shardman cluster hosts. Then on the backup host:
apt-get install pg-probackup shardman-utils mkdir -pdirectory
chown postgres:postgresdirectory
-R shardmanctl [common_options
] probackup init --backup-path=directory
--etcd-path=directory/etcd
--remote-user=postgres
--remote-port=22
shardmanctl [common_options
] probackup archive-command --backup-path=directory
--remote-user=postgres
--remote-port=22
If all the requirements are met, then run the backup
subcommand for the cluster backup:
shardmanctl [common_options
] probackup backup --backup-path=directory
--etcd-path=directory
--backup-mode=MODE
You must pass the directories through the --backup-path
and --etcd-path
options and backup mode through --backup-mode
. Full and delta backups are available with FULL
, DELTA
, PTRACK
and PAGE
values. Also it is possible to specify backup compression options through --compress
, --compress-algorithm
and --compress-level
flags, as well as specify --remote-port
and --remote-user
flags. You can limit the number of running concurrent tasks when doing backup by passing the limit through the --maxtasks
flag.
By default, copying data via SSH is used to create a backup. To copy data to a mounted partition instead, use the --storage-type
option with the mount
value. This value will be automatically used in the restore process.
You can also copy data to an S3-compatible object storage. To do this, use the --storage-type
option with the S3
value. When this value is used, it is required to specify the directory for pg_probackup logs. You can do it either by specifying --log-directory
for each command or set the environment variable SDM_LOG_DIRECTORY
, for example:
export SDM_LOG_DIRECTORY=/backup/logs
If you are going to perform backup/restore only for an S3-compatible object storage, you can also set an environment variable instead of specifying --storage-type
in each probackup
command:
export SDM_STORAGE_TYPE=S3
Restoring a Shardman Cluster using probackup command
shardmanctl in probackup mode can perform either full restore, metadata-only or schema-only restore of a Shardman cluster from a backup created by the probackup backup
command.
To perform full or partial restore, firstly you must select needed backup to restore from. To show list of available backups run the following command:
shardmanctl [common_options
] probackup show --backup-dir=path
--format=format
[--archive ] [-i|--backup-idbackup-id
] [--instanceinstance
]
The output should be a list of backups with their IDs in a table or JSON format. Then pick the needed backup ID and run the probackup restore command.
shardmanctl [common_options
] probackup restore --backup-dir=path
--backup-id=id
Pass the path to the repo through the --backup-dir
option and backup ID througt --backup-id
flag.
If you encounter issues with an etcd instance, it makes sense to perform metadata-only restore. To do this, you can run the following command:
shardmanctl [common_options
] probackup restore --backup-dir=path
--backup-id=id
--metadata-only
If you need to restore only schema information, like: tables, roles and etc. you should specify --schema-only
option.
For both kinds of restore, you can specify --timeout
for the tool to exit with error after waiting until the cluster is ready or the recovery is complete for the specified number of seconds.
You can specify --shard
parameter for restoring only on the single shard.
Also you can specify --recovery-target-time
option for Point-in-Time Recovery. In this case Shardman finds the closest syncpoint to specified timestamp and suggests restoring on the found LSN. You can also specify --wal-limit
to limit the number of WAL segments to be processed.
Important
Before restoring a Shardman cluster, make sure that the cluster is up by executing the shardmanctl status
command. If the output shows errors, performing the restore can result in the cluster becoming unavailable. First, fix the errors by reinitializing the cluster and restoring the etcd metadata. Then you can proceed to restoring the cluster from backup.
Reinitializing Replicas
If replicas are in an incorrect state, you can reset them using the shardmanctl command:
shardmanctl [common_options
] shard --shard=shard_name
replicas reinit
This command determines the nodes on which replicas of the specified shard are running and sends a request to shardmand on these nodes. After receiving this request, shardmand clears the postgres data directory and restarts the keeper
thread that is responsible for managing the replica. After that, the replicas are restarted and begin to receive data from the corresponding primary.
Examples
Initializing the Cluster
To initialize a Shardman cluster that has the cluster0
name, uses an etcd cluster consisting of n1
,n2
and n3
nodes listening on port 2379, ensure proper settings in the spec file sdmspec.json
and run:
$
shardmanctl --store-endpoints http://n1:2379,http://n2:2379,http://n3:2379 init -f sdmspec.json
Getting the Cluster Connection String
To get the connection string for a Shardman cluster that has the cluster0
name, uses an etcd cluster consisting of n1
,n2
and n3
nodes listening on port 2379, run:
$
shardmanctl --store-endpoints http://n1:2379,http://n2:2379,http://n3:2379 getconnstr
dbname=postgres host=n1,n4,n2,n1,n1,n2,n4,n3 password=yourpasswordhere port=5432,5433,5432,5433,5432,5433,5432,5433 user=postgres
To add replicas to getconnstr
, use --all
.
Getting the Cluster Status
Here is a sample status output from shardmanctl with OK and Error statuses:
$
shardmanctl status --filter store,shardmand,rg --sort=node
┌──────────────────────────────────────────────────────────────────────────────────────────────────────────┐ │ == STORE STATUS == │ ├────────────┬─────────────────────────────────────────────┬───────────────────────┬───────────────────────┤ │ STATUS │ MESSAGE │ REPLICATION GROUP │ NODE │ ├────────────┼─────────────────────────────────────────────┼───────────────────────┼───────────────────────┤ │ OK │ etcd store is OK │ │ │ └────────────┴─────────────────────────────────────────────┴───────────────────────┴───────────────────────┘ ┌──────────────────────────────────────────────────────────────────────────────────────────────────────────┐ │ == SHARDMAND STATUS == │ ├────────────┬─────────────────────────────────────────────┬───────────────────────┬───────────────────────┤ │ STATUS │ MESSAGE │ REPLICATION GROUP │ NODE │ ├────────────┼─────────────────────────────────────────────┼───────────────────────┼───────────────────────┤ │ OK │ shardmand on node 56d819b4e9e4 is OK │ │ 56d819b4e9e4 │ ├────────────┼─────────────────────────────────────────────┼───────────────────────┼───────────────────────┤ │ OK │ shardmand on node 6d0aabd50acc is OK │ │ 6d0aabd50acc │ └────────────┴─────────────────────────────────────────────┴───────────────────────┴───────────────────────┘ ┌───────────────────────────────────────────────────────────────────────────────────────────────────────────┐ │ == REPLICATION GROUP STATUS == │ ├────────────┬──────────────────────────────────────────────┬───────────────────────┬───────────────────────┤ │ STATUS │ MESSAGE │ REPLICATION GROUP │ NODE │ ├────────────┼──────────────────────────────────────────────┼───────────────────────┼───────────────────────┤ │ OK │ Replication group clover-1-56d819b4e9e4 is │ clover-1-56d819b4e9e4 │ │ │ │ OK │ │ │ ├────────────┼──────────────────────────────────────────────┼───────────────────────┼───────────────────────┤ │ │ Replication connection is down for slave │ │ │ │ Error │ 6d0aabd50acc:5442 in replication group │ clover-1-6d0aabd50acc │ 6d0aabd50acc:5442 │ │ │ clover-1-6d0aabd50acc │ │ │ └────────────┴──────────────────────────────────────────────┴───────────────────────┴───────────────────────┘
Rewriting stolon Specification
First, get the list of available keys in the store using the following command:
$
shardmanctl store keys
{ "Key": "shardman/cluster0/data/cluster", "Alias": "cluster" }{ "Key": "shardman/cluster0/data/shardmand/56d819b4e9e4" }{ ... "Key": "shardman/cluster0/stolon/remoteLogs/6d0aabd50acc/clover-1-6d0aabd50acc/keeper_1/error" }
Get stolon configuration from the store and save it in the stolonspec.json
file with the command
$
shardmanctl store get -a stolonspec -f stolonspec.json
Apply the necessary changes to the file and upload the new specification using shardmanctl config update
. Mind that shardman.config_uuid
parameter is deleted with shardmanctl store get -a stolonspec
and not with shardmanctl store get -k full/path/to/clusterspec
; using spec with existing shardman.config_uuid
will result in a conflict.
Important
Do not use store set
command to update cluster configurations because it does not apply a new specification on all nodes, it only writes it to the store. For the above example with stolon specification, shardmanctl config update
is acceptable.
To double-check, you can get the cluster key with new StolonSpec
by the full key name (which was shown earlier with store keys
command):
$
shardmanctl store get -k shardman/cluster0/data/cluster
{ "FormatVersion": 1, "Spec": { "PgSuAuthMethod": "md5", "PgSuPassword": "12345", "PgSuUsername": "postgres", "PgReplAuthMethod": "md5", "PgReplPassword": "12345", "PgReplUsername": "repluser", "ShardSpec": { ... }
Adding Nodes to the Cluster
To add n1
,n2
, n3
and n4
nodes to the cluster, run:
$
shardmanctl --store-endpoints http://n1:2379,http://n2:2379,http://n3:2379 nodes add -n n1,n2,n3,n4
Important
The number of nodes being added must be a multiple of Repfactor + 1
if cross
placement policy is used.
Removing Nodes from the Cluster
To remove n1
and n2
nodes from the cluster0
cluster, run:
$
shardmanctl --store-endpoints http://n1:2379,http://n2:2379,http://n3:2379 nodes rm -n n1,n2
If cross
placement policy is used, then the clovers that contain them will be deleted along with the nodes.
Executing a Query on All Replication Groups
To execute the select version()
query on all replication groups, run:
$
shardmanctl --store-endpoints http://n1:2379,http://n2:2379,http://n3:2379 forall --sql 'select version()'
Node 1 says: [PostgreSQL 13.1 on x86_64-pc-linux-gnu, compiled by gcc (Ubuntu 9.3.0-17ubuntu1~20.04) 9.3.0, 64-bit] Node 4 says: [PostgreSQL 13.1 on x86_64-pc-linux-gnu, compiled by gcc (Ubuntu 9.3.0-17ubuntu1~20.04) 9.3.0, 64-bit] Node 3 says: [PostgreSQL 13.1 on x86_64-pc-linux-gnu, compiled by gcc (Ubuntu 9.3.0-17ubuntu1~20.04) 9.3.0, 64-bit] Node 2 says: [PostgreSQL 13.1 on x86_64-pc-linux-gnu, compiled by gcc (Ubuntu 9.3.0-17ubuntu1~20.04) 9.3.0, 64-bit]
Performing Rebalance
To rebalance sharded tables in the cluster0
cluster, run:
$
shardmanctl --store-endpoints http://n1:2379,http://n2:2379,http://n3:2379 rebalance
Updating PostgreSQL Configuration Settings
To set the max_connections
parameter to 200
in the cluster, create the spec file (for instance, ~/stolon.json
) with the following contents:
{ "pgParameters": { "max_connections": "200" } }
Then run:
$
shardmanctl --store-endpoints http://n1:2379,http://n2:2379,http://n3:2379 config update -p -f ~/stolon.json
Since changing max_connections
requires a restart, DBMS instances are restarted by this command.
Performing Backup and Recovery
To create a backup of the cluster0
cluster using etcd at etcdserver
listening on port 2379
and store it in the local directory /var/backup/shardman
, run:
$
shardmanctl --store-endpoints http://etcdserver:2379 backup --datadir=/var/backup/shardman --use-ssh
Assume that you are performing a recovery from a backup to the cluster0
cluster using etcd at etcdserver
listening on port 2379
and you take the backup description from the /var/backup/shardman/backup_info
file. Edit the /var/backup/shardman/backup_info
file, set DataRestoreCommand
, RestoreCommand
as necessary and run:
$
shardmanctl --store-endpoints http://etcdserver:2379 recover --info /var/backup/shardman/backup_info
For metadata-only restore, run:
$
shardmanctl --store-endpoints http://etcdserver:2379 recover --metadata-only --dumpfile /var/backup/shardman/etcd_dump
For schema-only restore, run:
$
shardmanctl --store-endpoints http://etcdserver:2379 recover --schema-only --dumpfile /var/backup/shardman/etcd_dump
For single shard restore, run:
$
shardmanctl --store-endpoints http://etcdserver:2379 recover --info /var/backup/shardman/backup_info --shard shard_1
Performing Backup and Recovery with probackup Command
To create a backup of the cluster0
cluster using etcd at etcdserver
listening on port 2379
and store it in the local directory /var/backup/shardman
, first initialize the backups repository with the init
subcommand:
$
shardmanctl --store-endpoints http://etcdserver:2379 probackup init --backup-path=/var/backup/shardman --etcd-path=/var/backup/etcd_dump
Then add and enable archive_command
with the archive-command
subcommand:
$
shardmanctl --store-endpoints http://etcdserver:2379 probackup archive-command add --backup-path=/var/backup/shardman
If the repository is successfully initialized and archive-command
successfully added, create a FULL
backup with the backup
subcommand:
$
shardmanctl --store-endpoints http://etcdserver:2379 probackup backup --backup-path=/var/backup/shardman --etcd-path=/var/backup/etcd_dump --backup-mode=FULL --compress --compress-algorithm=zlib --compress-level=5
To create DELTA
, PTRACK
or PAGE
backup, run the backup
subcommand with DELTA
, PTRACK
or PAGE
value of the --backup-mode
option:
$
shardmanctl --store-endpoints http://etcdserver:2379 probackup backup --backup-path=/var/backup/shardman --etcd-path=/var/backup/etcd_dump --backup-mode=DELTA --compress --compress-algorithm=zlib --compress-level=5
To show the created backup ID, run show
subcommand:
$
shardmanctl --store-endpoints http://etcdserver:2379 probackup show --backup-path=/var/backup/shardman --format=table
┌────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ │ == BACKUP ID 'S88FRO' │ │ == │ ├───────────────────────────────────┬───────────────────────────────────┬───────────────────────────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┤ │ INSTANCE │ HOST │ RECOVERY TIME │ MODE │ WAL MODE │ TLI │ DATA │ WAL │ Z-RATIO │ START LSN │ STOP LSN │ STATUS │ ├───────────────────────────────────┼───────────────────────────────────┼───────────────────────────────────┼────────────┼────────────┼────────────┼────────────┼────────────┼────────────┼────────────┼────────────┼────────────┤ │ shard-1 │ n1 │ 2024-02-02 14:19:05+00 │ FULL │ ARCHIVE │ 1/0 │ 42.37MiB │ 16MiB │ 1.00 │ 0/C000028 │ 0/D0018B0 │ OK │ ├───────────────────────────────────┼───────────────────────────────────┼───────────────────────────────────┼────────────┼────────────┼────────────┼────────────┼────────────┼────────────┼────────────┼────────────┼────────────┤ │ shard-2 │ n2 │ 2024-02-02 14:19:05+00 │ FULL │ ARCHIVE │ 1/0 │ 42.38MiB │ 16MiB │ 1.00 │ 0/C000028 │ 0/D001E00 │ OK │ └───────────────────────────────────┴───────────────────────────────────┴───────────────────────────────────┴────────────┴────────────┴────────────┴────────────┴────────────┴────────────┴────────────┴────────────┴────────────┘
In PTRACK
backup mode, Shardman tracks page changes on the fly. Continuous archiving is not necessary for it to operate. Each time a relation page is updated, this page is marked in a special PTRACK
bitmap. Tracking implies some minor overhead on the database server operation, but speeds up incremental backups significantly.
If you are going to use PTRACK
backups, complete the following additional steps:
Preload the ptrack shared library on each node. This can be done by adding the
ptrack
value to theshared_preload_libraries
parameter.Сreate the PTRACK extension on each cluster node:
$
shardmanctl --store-endpoints http://etcdserver:2379 forall --sql "create extension ptrack"
To enable tracking page updates, set the
ptrack.map_size
parameter as follows:$
shardmanctl --store-endpoints http://etcdserver:2379 update '{"pgParameters":{"ptrack.map_size":"64"}}'
For optimal performance, it is recommended to set
ptrack.map_size
toN/1024
, whereN
is the maximum size of the cluster node, in MB. If you set this parameter to a lower value, PTRACK is more likely to map several blocks together, which leads to false-positive results when tracking changed blocks and increases the incremental backup size as unchanged blocks can also be copied into the incremental backup. Settingptrack.map_size
to a higher value does not affect PTRACK operation, but it is not recommended to set this parameter to a value higher than 1024.
To validate the created backup, run validate
subcommand:
$
shardmanctl --store-endpoints http://etcdserver:2379 probackup validate --backup-path=/var/backup/shardman --backup-id=RFP1FI
Assume that you are performing a recovery from a backup to the cluster0
cluster using etcd at etcdserver
listening on port 2379
and you take the backup ID from the show
command:
$
shardmanctl --store-endpoints http://etcdserver:2379 probackup restore --backup-path=/var/backup/shardman --backup-id=RFP1FI
Finally we need to enable archive_command
back.
$
shardmanctl --store-endpoints http://etcdserver:2379 probackup archive-command add --backup-path=/var/backup/shardman
For metadata-only restore, run:
$
shardmanctl --store-endpoints http://etcdserver:2379 probackup restore --metadata-only --backup-path=/var/backup/shardman --backup-id=RFP1FI
For metadata-only restore, run:
$
shardmanctl --store-endpoints http://etcdserver:2379 probackup restore --schema-only --backup-path=/var/backup/shardman --backup-id=RFP1FI
For single shard restore, run:
$
shardmanctl --store-endpoints http://etcdserver:2379 probackup restore --backup-path=/var/backup/shardman --backup-id=RFP1FI --shard shard_1
For Point-in-Time Recovery, run:
$
shardmanctl --store-endpoints http://etcdserver:2379 probackup restore --metadata-only --backup-path=/var/backup/shardman --backup-id=RFP1FI --recovery-target-time='2006-01-02 15:04:05' -s
Loading Data from a Text File
To load data into a Shardman cluster, run the following command:
$
shardmanctl --store-endpoints http://etcdserver:2379 load --file=/var/load/data.tsv --table=mytable --source file --format text -j 8
In this example, data is loaded from the /var/load/data.tsv
data file (tab-delimited) into the table mytable
in 8 parallel threads. You can use schema.table
as the table name.
Loading data from PostgreSQL table
To load data into a Shardman cluster from a PostgreSQL table, run the following command:
$
shardmanctl --store-endpoints http://etcdserver:2379 load -t desttable --source postgres --source-connstr "dbname=db host=srchost port=srcport user=login password=passwd" --source-table sourcetable -j 8
In this example, data is loaded from the table sourcetable
into the desttable
table in 8 parallel threads. You can use schema.table
as table names.
Loading Data with a Schema from PostgreSQL
To load data with a schema into Shardman cluster from PostgreSQL, run the following command:
$
shardmanctl --store-endpoints http://etcdserver:2379 load --schema load_schema.yaml
The file load_schema.yaml
has the folowing format:
version: "1.0" migrate: connstr: "dbname=workdb host=workhost port=workport user=workuser password=workpassword" jobs: 8 batch: 1000 options: - create_schema - create_table - create_index - create_sequence - create_foreign_key - create_role - copy_ownership - copy_grants - truncate_table - skip_no_pkey_tables - skip_create_index_error - skip_create_extension_error - skip_load_errors - skip_create_foreign_key_error - skip_create_role_error - skip_copy_grants_error - skip_copy_ownership_error schemas: - name: public all: false tables: - name: tab1 type: sharded partitions: 6 distributedby: id priority: 3 - name: tab2 type: global - name: tab3 type: sharded partitions: 6 distributedby: field_id colocatewith: tab1 - name: table4 type: global source: schema.view source_pk: field_id - name: table5 type: global source: schema.func(arg) source_pk: field_id - name: schema2 all: false default_type: sharded default_partitions: 6 tables: - name: table1 distributedby: field_id priority: 2 - name: table2 type: global - name: table3 source: schema.view distributedby: field_id priority: 3 - name: table4 distributedby: field_id source: schema.func(arg) - name: table5 source: schema."complex.""table.name" distributedby: field_id - name: schema3 all: true skip_tables: [table1, table2, table3] roles: - name: test_user1 password: test_password - name: test_user2
The migrate.jobs
value defines the number of parallel data loader processes.
The migrate.batch
value is the number of rows in one batch (recommended value is 1000).
The migrate.schemas
section defines an array of source database schemas that you are working with. All other schemas will be skipped.
If the all
value is set to true
, then all tables from the current schema will be migrated (with global
type by default). If a table is listed in the migrate.schemas.tables
array, then the target table type must be explicitly specified for it. Two types of tables are currently supported: global
and sharded
. Global
tables are loaded first, then sharded
tables and at the end sharded
tables with the colocatedwith
parameter. The order of loading tables of the same type can be changed using priority
option.
The migrate.schemas.skip_tables
section defines an array of table names that will be skipped when the schema is loaded even if the all
parameter is set to true
.
For sharded tables, the following attributes must be set: distributedby
(specifies the name of the column to use for the table partitioning) and partitions
(number of partitions that will be created for this table). Optionally, for sharded tables colocatewith
attribute can be set (name of the table to colocate with). Shardman will try to place partitions of the created table with the same partition key on the same nodes as the corresponding partitions of the table specified by colocatewith
.
You can specify the table default_type
option for a schema: global
or sharded
(default: global
). For the sharded
type you can also specify the default_partitions
option (default: 20). If you set default_type
to sharded
, you need to specify the distributedby
option for each table.
The source
option for a table should include the schema and table source: schema.source
. The source can be a table, view or function. For example: public.table
, public.view
, public.func(arg)
. If you set the source
view or function for a global table, you should specify source_pk
to set the primary key for this table. If source
is not specified or contains the name of a table, you can also specify source_pk
to create a primary key or override the existing one.
The priority
option for table determines the order in which the tables of the same type are loaded. Tables with higher priority
are loaded earlier. Default priority
value is 0.
The migrate.roles
section defines an array of role names and passwords that will be copied from the source database if create_role
is specified.
The schema supports the following options:
create_schema
— create database schemas if they do not exist.create_table
— create tables if they do not exist.create_index
— create indexes after creating tables.create_sequence
— create sequences if they do not exist.create_foreign_key
— create foreign keys after creating tables.truncate_table
— truncate tables before data load.create_role
— create global roles defined inmigrate.roles
and copy role parameters from the source database.copy_grants
— copy access privileges from the source database.copy_ownership
— change of table owners to the owner in the source database.skip_no_pkey_tables
— skip tables without primary keys.skip_create_index_error
— skip index creation errors.skip_create_extension_error
— skip extension creation errors.skip_load_errors
— continue loading if errors occur.skip_create_foreign_key_error
— skip foreign key creation errors.skip_create_role_error
— skip role creation errors.skip_copy_ownership_error
— skip table owner changing errors.skip_copy_grants_error
— skip errors when copying access privuleges from the source database.
Initialization and Running Benchmarks
To initialize a benchmark via shardmanctl using pgbench with the shardman
schema, scale=1000
, partitions=40
, run:
$
shardmanctl bench init --schema-type=shardman --scale=1000 --partitions=40
To run an initialized benchmark for the same shardman
schema, number of jobs=4
, number of clients=10
, duration in seconds=60
and full pgbench output, use:
$
shardmanctl bench run --schema-type=shardman --jobs=4 --client=10 --time=60 --full-output
To initialize a benchmark with the custom
schema from file schema.psql
with scale=1000
run:
$
shardmanctl bench init --schema-type=custom --schema-file=schema.psql --scale=1000
To run an initialized benchmark with the custom
schema and custom transaction script from script.psql
with the number of jobs=4
, number of clients=10
, duration in seconds=60
, use:
$
shardmanctl bench run --schema-type=custom --file=script.psql --jobs=4 --client=10 --time=60
To clean up a PostgreSQL database of tpc-b
tables, use:
$
shardmanctl bench cleanup
Benchmark Generation Scripts
To generate a benchmark sequence via shardmanctl from the config file=cfg.yaml
and output the result to file=script.sh
, run:
$
shardmanctl bench generate --config=cfg.yaml --output-file=script.sh
Configuration file example:
benches: - schema_type: single init_flags: "-s 3" run_flags: "-n -P 10 -c 10 -j 4 -T 10" - schema_type: simple init_flags: "-s 4" run_flags: "-n -P 10 -c 20 -j 4 -T 10" partitions: 100 - schema_type: shardman init_flags: "-s 5" run_flags: "-n -P 10 -c 20 -j 4 -T 10" - schema_type: custom init_flags: "-s 6" schema_file: "schema.psql"