# # This is an excerpt from a subclass of PostgresNode # # Generate backtraces and optionally core files for all user backends and # walsenders associated with this node. Requires gdb to be present. Cores # will be labeled by node name. sub gdb_backends { my ($self, %kwargs) = @_; $kwargs{backtrace_timeout_s} //= '60'; $kwargs{core_timeout_s} //= '60'; $kwargs{want_cores} //= 0; $kwargs{core_name_pattern} //= 'core.{{pid}}'; $kwargs{gdb_logfile_pattern} //= ''; my $postmaster_pid = $self->{_pid}; my $pgname = $self->name; # Globals # TODO make these conditional on an expression to filter them. # TODO handle statics that vary across files # TODO add typecasts for when we don't have debuginfo # TODO useful GUCs # my @print_exprs = ( # All backends 'IsPostmasterEnvironment', 'IsUnderPostmaster', 'PostmasterPid', 'LocalRecoveryInProgress', '*MyProc', 'MyAuxProcType', '*XLogCtl', '*ControlFile', # Generic signal handling 'InterruptPending', 'ProcDiePending', 'ShutdownRequestPending', 'ConfigReloadPending', # user backend / postgres 'xact_started', 'doing_extended_query_message', 'ignore_till_sync', # startup process 'ThisTimeLineID', 'LastRec', 'ArchiveRecoveryRequested', 'InArchiveRecovery', 'PrimaryConnInfo', 'PrimarySlotName', 'StandbyMode', # autovac 'am_autovacuum_launcher', 'am_autovacuum_worker', 'got_SIGHUP', 'got_SIGUSR2', 'got_SIGTERM', "'autovacuum.c':got_SIGTERM", # for walsenders 'am_walsender', 'am_cascading_walsender', 'am_db_walsender', '*MyWalSnd', '*xlogreader', 'sendTimeLine', 'sentPtr', 'streamingDoneSending', 'streamingDoneReceiving', "'walsender.c':got_SIGTERM", 'got_STOPPING', 'got_SIGUSR2', 'replication_active', '*logical_decoding_ctx', 'logical_startptr', # walreceiver 'recvFileTLI', '*wrconn', # checkpointer '*CheckpointerShmem', 'last_checkpoint_time', 'ckpt_active', # for bgworkers 'IsBackgroundWorker', # for pgl backends '*MyPGLogicalWorker', '*MyPGLSubscription', # for bdr backends '*MyBdrSubscription', # postmaster 'pmState', ); # Add your own print expressions by passing print_exprs => ['var1', 'var2'] push @print_exprs, @{$kwargs{print_exprs}} if (defined($kwargs{print_exprs})); my @pids; if (defined($kwargs{pids})) { if (ref($kwargs{pids}) eq 'ARRAY') { # arrayref pid-list @pids = @{$kwargs{pids}}; } elsif (ref($kwargs{pids}) eq '') { # Scalar pid-list @pids = split(qr/[\r\n]/, $kwargs{pids}); } else { die("keyword argument 'pids' must be undef, an arrayref, or a scalar string of pids"); } } else { # Probe all children. Default if no pid list passed. # # We can't rely on querying the db because it might be shutting down so we don't # want to use pg_stat_activity and pg_stat_repliation. Use the postmaster pid # instead, with ps. my ($stdout, $stderr); IPC::Run::run(['pgrep', '--parent', $postmaster_pid], '>', \$stdout, '2>', \$stderr); print("raw pid list: $stdout\n"); @pids = split(qr/[\r\n]/, $stdout); if (scalar(@pids) == 0) { print("Failed to find child processes for pid $postmaster_pid. pgrep produced stdout \"$stdout\" and stderr \"$stderr\".\n"); return; } # Include postmaster itself in the list push @pids, $postmaster_pid; } print("getting backtraces of children of postmaster $postmaster_pid for node $pgname: @pids\n"); foreach my $pid (@pids) { my $core_path = $kwargs{core_name_pattern}; $core_path =~ s/\{\{pmpid\}\}/$postmaster_pid/g; $core_path =~ s/\{\{pid\}\}/$pid/g; $core_path =~ s/\{\{name\}\}/$pgname/g; if (dirname($core_path) ne "") { make_path(dirname($core_path)); } my $gdb_logfile = $kwargs{gdb_logfile_pattern}; $gdb_logfile =~ s/\{\{pmpid\}\}/$postmaster_pid/g; $gdb_logfile =~ s/\{\{pid\}\}/$pid/g; $gdb_logfile =~ s/\{\{name\}\}/$pgname/g; if (dirname($gdb_logfile) ne "") { make_path(dirname($gdb_logfile)); } my $gdbcmds = q[set prompt set style enabled off set pagination off set print pretty on set print max-depth 20 set print frame-arguments all set print frame-info source-and-location set print entry-values if-needed set print symbol-filename on set print symbol-loading full set print type typedefs on set print symbol on set print array on set print array-indexes on set print elements 100 set print null-stop on ]; $gdbcmds .= qq[attach $pid ]; $gdbcmds .= q[info proc if ($_exitsignal) printf "Exited with signal: %d\n", $_exitsignal end printf "application_name = %s\n", application_name printf "debug_query_string = %s\n", debug_query_string echo \nbacktrace (short):\n bt echo \nbacktrace (extended):\n thread apply all bt full printf "\n\nEXPRESSIONS:\n" ]; for my $print_expr (@print_exprs) { # This prints annoying $nn convenience variable labels, but # there's not much to be done about that. $gdbcmds .= qq[printf "$print_expr: " p $print_expr ]; } if ($kwargs{'want_cores'}) { $gdbcmds .= qq[printf "\\n\\n" gcore $core_path ]; } $gdbcmds .= q[quit 0 ]; # TODO: recursively expand some of the target vars # # Hint: if you want to run REALLY REALLY SLOW you can also get state of # globals. We should possibly do this but it's not easy to filter out the # libc stuff etc, and requires some back-and-forth with gdb. # my @log_arg = (); if ($gdb_logfile) { print("Writing gdb log to ${gdb_logfile}\n"); @log_arg = ('&>', $gdb_logfile); } my $timeout = IPC::Run::timeout($kwargs{'backtrace_timeout_s'}); print("--BACKTRACE-START-- $pid\n") unless ($gdb_logfile); my $h = IPC::Run::start(['gdb'], '<', \$gdbcmds, @log_arg, $timeout); do { $h->pump; } while $h->pumpable; $h->finish; print("--BACKTRACE-END-- $pid\n") unless ($gdb_logfile); my $rc = $h->result(0); print("gdb exited with $rc\n") unless ($rc == 0); } print("all backtraces and (if requested) cores have been collected.\n"); }