From a10183edfaba64cdcb814c6c0a22834620e9b49b Mon Sep 17 00:00:00 2001
From: Victoria Shepard <5807469+demonolock@users.noreply.github.com>
Date: Mon, 24 Oct 2022 02:11:35 +0500
Subject: [PATCH] C4a pg dump masking option
---
doc/src/sgml/ref/pg_dump.sgml | 56 ++
src/bin/pg_dump/Makefile | 5 +-
src/bin/pg_dump/masking.c | 854 ++++++++++++++++++++++++++
src/bin/pg_dump/masking.h | 70 +++
src/bin/pg_dump/pg_dump.c | 122 +++-
src/bin/pg_dump/t/011_dump_masking.pl | 540 ++++++++++++++++
src/tools/msvc/Mkvcbuild.pm | 1 +
7 files changed, 1630 insertions(+), 18 deletions(-)
create mode 100644 src/bin/pg_dump/masking.c
create mode 100644 src/bin/pg_dump/masking.h
create mode 100644 src/bin/pg_dump/t/011_dump_masking.pl
diff --git a/doc/src/sgml/ref/pg_dump.sgml b/doc/src/sgml/ref/pg_dump.sgml
index 8b9d9f4cad..d4045d8ca2 100644
--- a/doc/src/sgml/ref/pg_dump.sgml
+++ b/doc/src/sgml/ref/pg_dump.sgml
@@ -881,6 +881,62 @@ PostgreSQL documentation
+
+
+
+
+ Specify a filename from which to read masking patterns for using masking functions
+ to fields. You have ability to use user-defined functions from file, functions that
+ already defined in the cluster and default functions.
+
+ Default masking functions - are full masking according to the data types.
+ Returns 'XXXX' for string data types [text, varchar, character].
+ Returns 0 for numeric data types [int, numeric, real, smallint, bigint].
+ Returns '1900-01-01' for date and '1900-01-01 00:00:00' for timestamp.
+
+ Here is an example of masking pattern file:
+
+ masking_file.txt
+
+ Schema1 {
+ Table1 {
+ field11 : function_name11
+ , field12 : function_name12
+ , field13 : function_name13
+ }
+
+ Table2 {
+ field21 : function_name21
+ , field22 : function_name22
+ /* This function will be created by script from the path 'path_to_file_with_function'
+ and used for 'field23' */
+ , field23 : "path_to_file_with_function"
+ }
+ }
+
+ /* Functions inside this block will be used for all schemes */
+ default {
+ /* Functions inside this block will be used for all tables */
+ default {
+ /* Function 'for_all_fields' will be used for all fields
+ did not covered by exact functions */
+ default: for_all_fields,
+ field1: value1,
+ /* Using default masking function for field2 */
+ field2: default
+ }
+
+ /* Functions inside this block will be used for tables with name 'Table'
+ in the all schemes */
+ Table {
+ field : function_name
+ }
+ }
+
+
+
+
+
diff --git a/src/bin/pg_dump/Makefile b/src/bin/pg_dump/Makefile
index 9dc5a784dd..266ec4a417 100644
--- a/src/bin/pg_dump/Makefile
+++ b/src/bin/pg_dump/Makefile
@@ -26,6 +26,7 @@ OBJS = \
$(WIN32RES) \
compress_io.o \
dumputils.o \
+ masking.o \
parallel.o \
pg_backup_archiver.o \
pg_backup_custom.o \
@@ -43,8 +44,8 @@ pg_dump: pg_dump.o common.o pg_dump_sort.o $(OBJS) | submake-libpq submake-libpg
pg_restore: pg_restore.o $(OBJS) | submake-libpq submake-libpgport submake-libpgfeutils
$(CC) $(CFLAGS) pg_restore.o $(OBJS) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o $@$(X)
-pg_dumpall: pg_dumpall.o dumputils.o $(WIN32RES) | submake-libpq submake-libpgport submake-libpgfeutils
- $(CC) $(CFLAGS) pg_dumpall.o dumputils.o $(WIN32RES) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o $@$(X)
+pg_dumpall: pg_dumpall.o dumputils.o masking.o $(WIN32RES) | submake-libpq submake-libpgport submake-libpgfeutils
+ $(CC) $(CFLAGS) pg_dumpall.o dumputils.o masking.o $(WIN32RES) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o $@$(X)
install: all installdirs
$(INSTALL_PROGRAM) pg_dump$(X) '$(DESTDIR)$(bindir)'/pg_dump$(X)
diff --git a/src/bin/pg_dump/masking.c b/src/bin/pg_dump/masking.c
new file mode 100644
index 0000000000..8ad084e2b6
--- /dev/null
+++ b/src/bin/pg_dump/masking.c
@@ -0,0 +1,854 @@
+/*-------------------------------------------------------------------------
+ *
+ * masking.c
+ *
+ * Data masking tool for pg_dump
+ *
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/bin/pg_dump/masking.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include
+#include "masking.h"
+#include "common/logging.h"
+
+#define REL_SIZE 65 /* Length of relation name - 63 bytes (symbols) + addition symbol for correct work with option --quote-all-identifiers*/
+#define DEFAULT_NAME "default"
+#define COL_WITH_FUNC_SIZE 3 * REL_SIZE + 3 /* schema_name.function name + '(' + column_name + ') */
+
+char REL_SEP = '.'; /* Relation separator */
+
+void concatFunctionAndColumn(char *col_with_func, char *schema_name, char *column_name, char *function_name);
+extern void extractFuncNameIfPath(char *func_path, SimpleStringList *masking_func_query_path);
+int extractFunctionNameFromQueryFile(char *filename, char *func_name);
+char *getFullRelName(char *schema_name, char *table_name, char *column_name);
+int getMapIndexByKey(MaskingMap *map, char *key);
+bool isTerminal(char c);
+bool isSpace(char c);
+void printParsingError(struct MaskingDebugDetails *md, char *message, char current_symbol);
+char readName(char *rel_name, char c, struct MaskingDebugDetails *md, FILE *fin, int size);
+char readNextSymbol(struct MaskingDebugDetails *md, FILE *fin);
+void removeQuotes(char *func_name);
+char *readWord(FILE *fin, char *word);
+void setMapValue(MaskingMap *map, char *key, char *value);
+char skipMultiLineComment(char c, struct MaskingDebugDetails *md, FILE *fin);
+char skipOneLineComment(char c, struct MaskingDebugDetails *md, FILE *fin);
+
+/* Initialise masking map */
+MaskingMap *
+newMaskingMap()
+{
+ MaskingMap *map = malloc(sizeof(MaskingMap));
+ map->size = 0;
+ map->capacity = 8;
+ map->data = malloc(sizeof(Pair) * map->capacity);
+ memset(map->data, 0, sizeof(Pair) * map->capacity);
+ return map;
+}
+
+int
+getMapIndexByKey(MaskingMap *map, char *key)
+{
+ int index = 0;
+ while (map->data[index] != NULL)
+ {
+ if (strcmp(map->data[index]->key, key) == 0)
+ {
+ return index;
+ }
+ index++;
+ }
+ return -1;
+}
+
+/*
+ * Add value to map or rewrite, if key already exists
+ */
+void
+setMapValue(MaskingMap *map, char *key, char *value)
+{
+ if (key != NULL)
+ {
+ int index = getMapIndexByKey(map, key);
+ if (index != -1) /* Already have key in map */
+ {
+ free(map->data[index]->value);
+ map->data[index]->value = malloc(strlen(value) + 1);
+ strcpy(map->data[index]->value, value);
+ }
+ else
+ {
+ Pair *pair = malloc(sizeof(Pair));
+ pair->key = malloc(strlen(key) + 1);
+ pair->value = malloc(strlen(value) + 1);
+ memset(pair->key, 0, strlen(key));
+ memset(pair->value, 0, strlen(value));
+ strcpy(pair->key, key);
+ strcpy(pair->value, value);
+
+ map->data[map->size] = malloc(sizeof(Pair));
+ *map->data[map->size] = *pair;
+ map->size++;
+ free(pair);
+ }
+ if (map->size == map->capacity) /* Increase capacity */
+ {
+ map->capacity *= 1.5;
+ map->data = realloc(map->data, sizeof(Pair) * map->capacity);
+ }
+ }
+ free(key);
+}
+
+void
+printParsingError(struct MaskingDebugDetails *md, char *message, char current_symbol)
+{
+ pg_log_error("Error position (symbol '%c'): line: %d pos: %d. %s\n", current_symbol, md->line_num, md->symbol_num,
+ message);
+}
+
+bool
+isTerminal(char c)
+{
+ return c == ':' || c == ',' || c == '{' || c == '}' || c == EOF;
+}
+
+bool
+isSpace(char c)
+{
+ return c == ' ' || c == '\t' || c == '\n' || c == EOF;
+}
+
+/* Read to the end of a comment */
+char
+skipOneLineComment(char c, struct MaskingDebugDetails *md, FILE *fin)
+{
+ while (md->is_comment)
+ {
+ c = readNextSymbol(md, fin);
+ switch (c)
+ {
+ case '\n':
+ md->is_comment = false; /* End of a one line comment */
+ c = readNextSymbol(md, fin);
+ break;
+ case EOF:
+ return c; /* Handling `EOF` outside the function */
+ default:
+ continue;
+ }
+ }
+ return c;
+}
+
+/* Read to the end of a comment */
+char
+skipMultiLineComment(char c, struct MaskingDebugDetails *md, FILE *fin)
+{
+ while (md->is_comment)
+ {
+ c = readNextSymbol(md, fin);
+ switch (c)
+ {
+ case '*':
+ c = readNextSymbol(md, fin);
+ if (c == '/')
+ {
+ md->is_comment = false; /* End of a multi line comment */
+ c = readNextSymbol(md, fin);
+ break;
+ }
+ continue;
+ case EOF:
+ return c; /* Handling `EOF` outside the function */
+ default:
+ continue;
+ }
+ }
+ return c;
+}
+
+/*
+ * Read symbol and change place of cursor in MaskingDebugDetails
+ * md->line_num - increasing when we meet '\n'
+ * md->symbol_num - increasing after reading any symbol and reset
+ * when we meet '\n'
+ */
+char
+readNextSymbol(struct MaskingDebugDetails *md, FILE *fin)
+{
+ char c = fgetc(fin);
+ /* Count lines and columns */
+ if (c == '\n')
+ {
+ md->line_num++;
+ md->symbol_num = 1;
+ }
+ else
+ {
+ md->symbol_num++;
+ }
+ /* Skip comment */
+ if (c == '/' && !md->is_comment) /* First slash */
+ {
+ char next_c = fgetc(fin);
+ fseek(fin, -1, SEEK_CUR); /* Returning on 1 symbol back for correct line counting */
+ if (next_c == '/')
+ {
+ md->is_comment = true;
+ c = skipOneLineComment(c, md, fin);
+ }
+ else if (next_c == '*')
+ {
+ md->is_comment = true;
+ c = skipMultiLineComment(c, md, fin);
+ }
+ }
+ return c;
+}
+
+/* Read relation name */
+char
+readName(char *rel_name, char c, struct MaskingDebugDetails *md, FILE *fin, int size)
+{
+ bool word_started = false;
+ bool word_finished = false;
+ memset(rel_name, 0, size);
+ while (!isTerminal(c))
+ {
+ switch (c)
+ {
+ case ' ':
+ case '\t':
+ case '\n':
+ if (word_started && !word_finished)
+ {
+ word_finished = true;
+ }
+ break; /* Skip space symbols */
+ case EOF:
+ return c; /* Handling `EOF` outside the function */
+
+ default:
+ if (word_finished)
+ {
+ printParsingError(md, "Syntax error. Relation name can't contain space symbols.", c);
+ return c;
+ }
+ word_started = true;
+ strncat(rel_name, &c, 1);
+ break;
+ }
+ c = readNextSymbol(md, fin);
+ }
+ return c;
+}
+
+/* Concat schema name, table name and column name */
+char *
+getFullRelName(char *schema_name, char *table_name, char *column_name)
+{
+ /* Schema.Table.Column */
+ return psprintf("%s%c%s%c%s", schema_name, REL_SEP, table_name, REL_SEP, column_name);
+}
+
+/**
+ * Parsing file with masking pattern
+ * ------------------------------------
+ * Schema1
+ * {
+ * Table1
+ * {
+ * column11 : function_name11
+ * , column12 : function_name12
+ * , column13 : function_name13
+ * }
+ *
+ * Table2
+ * {
+ * column21 : function_name21
+ * , column22 : function_name22
+ * , column23 : "path_to_file_with_function/masking.sql"
+ * // Function 'masking.sql' will be stored in `masking_func_query_path` list, and it will be
+ * // created by script from the path 'path_to_file_with_function'. See more `pg_dump.c:createMaskingFunctions`
+ * }
+ * }
+ *
+ *
+ * default // Functions inside this block will be used for all schemas
+ * {
+ *
+ * default // Functions inside this block will be used for all tables
+ * {
+ * default: for_all_columns, // This function will be used for all columns did not covered by exact functions
+ * column1: value1,
+ * column2: value2
+ * }
+ *
+ * Table // Functions inside this block will be used for tables with name 'Table' in the all schemas
+ * {
+ * column : function_name
+ * }
+ * }
+ */
+int
+readMaskingPatternFromFile(FILE *fin, MaskingMap *map, SimpleStringList *masking_func_query_path)
+{
+ int exit_status;
+ int brace_counter;
+ int close_brace_counter;
+ char *schema_name;
+ char *table_name;
+ char *column_name;
+ char *func_name;
+ bool skip_reading;
+ char c;
+
+ struct MaskingDebugDetails md;
+ md.line_num = 1;
+ md.symbol_num = 0;
+ md.is_comment = false;
+ md.parsing_state = SCHEMA_NAME;
+ exit_status = EXIT_SUCCESS;
+
+ schema_name = malloc(REL_SIZE);
+ table_name = malloc(REL_SIZE);
+ column_name = malloc(REL_SIZE);
+ func_name = malloc(PATH_MAX + 1); /* We can get function name or path to file with a creating function query */
+
+ brace_counter = 0;
+ close_brace_counter = 0;
+ skip_reading = false;
+
+ c = ' ';
+ while (c != EOF)
+ {
+ if (skip_reading)
+ {
+ skip_reading = false;
+ }
+ else if (!isTerminal(c))
+ {
+ c = readNextSymbol(&md, fin);
+ }
+ switch (md.parsing_state)
+ {
+ case SCHEMA_NAME:
+ c = readName(schema_name, c, &md, fin, REL_SIZE);
+ md.parsing_state = WAIT_OPEN_BRACE;
+ memset(table_name, 0, REL_SIZE);
+ break;
+
+ case TABLE_NAME:
+ c = readName(table_name, c, &md, fin, REL_SIZE);
+ md.parsing_state = WAIT_OPEN_BRACE;
+ break;
+
+ case COLUMN_NAME:
+ c = readName(column_name, c, &md, fin, REL_SIZE);
+ md.parsing_state = WAIT_COLON;
+ break;
+
+ case FUNCTION_NAME:
+ c = readName(func_name, c, &md, fin, PATH_MAX);
+ extractFuncNameIfPath(func_name, masking_func_query_path);
+ setMapValue(map, getFullRelName(schema_name, table_name, column_name), func_name);
+ md.parsing_state = WAIT_COMMA;
+ break;
+
+ case WAIT_COLON:
+ if (isSpace(c))
+ break;
+ if (c != ':')
+ {
+ printParsingError(&md, "Waiting symbol ':'", c);
+ exit_status = EXIT_FAILURE;
+ goto clear_resources;
+ }
+ md.parsing_state = FUNCTION_NAME;
+ c = readNextSymbol(&md, fin);
+ skip_reading = true;
+ break;
+
+ case WAIT_OPEN_BRACE:
+ if (isSpace(c))
+ break;
+ if (c == '}' && brace_counter > 0)
+ {
+ md.parsing_state = WAIT_CLOSE_BRACE;
+ break;
+ }
+ if (c != '{')
+ {
+ printParsingError(&md, "Waiting symbol '{'", c);
+ exit_status = EXIT_FAILURE;
+ goto clear_resources;
+ }
+ if (table_name[0] != '\0') /* we have already read table_name */
+ {
+ md.parsing_state = COLUMN_NAME;
+ }
+ else
+ {
+ md.parsing_state = TABLE_NAME;
+ }
+ c = readNextSymbol(&md, fin);
+ skip_reading = true;
+ brace_counter++;
+ break;
+
+ case WAIT_CLOSE_BRACE:
+ if (isSpace(c))
+ break;
+ if (c != '}')
+ {
+ printParsingError(&md, "Waiting symbol '}'", c);
+ exit_status = EXIT_FAILURE;
+ goto clear_resources;
+ }
+ md.parsing_state = TABLE_NAME;
+ c = readNextSymbol(&md, fin);
+ brace_counter--;
+ break;
+
+ case WAIT_COMMA:
+ if (isSpace(c))
+ break;
+ if (c == '}')
+ {
+ c = readNextSymbol(&md, fin);
+ skip_reading = true;
+ close_brace_counter++;
+ break;
+ }
+ if (c != ',' && !isTerminal(c)) /* Schema_name or Table_name */
+ {
+ if (close_brace_counter == 1)
+ {
+ md.parsing_state = TABLE_NAME;
+ }
+ else if (close_brace_counter == 2)
+ {
+ md.parsing_state = SCHEMA_NAME;
+ }
+ else
+ {
+ printParsingError(&md, "Too many symbols '}'", c);
+ exit_status = EXIT_FAILURE;
+ goto clear_resources;
+ }
+ skip_reading = true;
+ close_brace_counter = 0;
+ break;
+ }
+ else if (c != ',')
+ {
+ printParsingError(&md, "Waiting symbol ','", c);
+ exit_status = EXIT_FAILURE;
+ goto clear_resources;
+ }
+ md.parsing_state = COLUMN_NAME;
+ c = readNextSymbol(&md, fin);
+ skip_reading = true;
+ break;
+ }
+ }
+ clear_resources:
+ free(schema_name);
+ free(table_name);
+ free(column_name);
+ free(func_name);
+ return exit_status;
+}
+
+/* Creating string in format `schema_name.function name(column_name)` */
+void
+concatFunctionAndColumn(char *col_with_func, char *schema_name, char *column_name, char *function_name)
+{
+ /* Default function */
+ if (strcmp(function_name, DEFAULT_NAME)==0)
+ {
+ strcpy(col_with_func, psprintf("_masking_function.%s", function_name));
+ }
+ /* Function name already contains schema name. If not, then add the same schema */
+ else if (strrchr(function_name, '.') != NULL)
+ {
+ strcpy(col_with_func, function_name);
+ }
+ else
+ {
+ strcpy(col_with_func, psprintf("%s.%s", schema_name, function_name));
+ }
+ strcpy(col_with_func, psprintf("%s(%s)", col_with_func, column_name));
+}
+
+/*
+ * Wrapping columns with functions
+ * schema_name.function_name(schema_name.table_name.column_name)
+ */
+char *
+addFunctionToColumn(char *schema_name, char *table_name, char *column_name, MaskingMap *map)
+{
+ char *col_with_func;
+ /* Try to find for exact schema, table and column */
+ int index = getMapIndexByKey(map, getFullRelName(schema_name, table_name, column_name));
+ if (index == -1) /* If didn't find, try to find function, that used for all schemas */
+ {
+ /* Try to find for exact table and column [default.table.column] */
+ index = getMapIndexByKey(map, getFullRelName(DEFAULT_NAME, table_name, column_name));
+ if (index == -1) /* If didn't find, try to find function, that used for all schemas and all tables */
+ {
+ /* Try to find for exact column [default.default.column] */
+ index = getMapIndexByKey(map, getFullRelName(DEFAULT_NAME, DEFAULT_NAME, column_name));
+ if (index == -1) /* If didn't find, try to find function, that used for all schemas and all tables and all columns */
+ {
+ /* Try to find function that used for all columns in all schemas and tables [default.default.default] */
+ index = getMapIndexByKey(map, getFullRelName(DEFAULT_NAME, DEFAULT_NAME, DEFAULT_NAME));
+ }
+ }
+ }
+ col_with_func = malloc(COL_WITH_FUNC_SIZE);
+ memset(col_with_func, 0, COL_WITH_FUNC_SIZE);
+ if (index != -1)
+ {
+ char *function_name = map->data[index]->value;
+ concatFunctionAndColumn(col_with_func, schema_name, column_name, function_name);
+ }
+ return col_with_func;
+}
+
+/* Remove the first and the last symbol in func_name */
+void
+removeQuotes(char *func_name)
+{
+ char *new_func_name = malloc(PATH_MAX + 1);
+ strncpy(new_func_name, func_name + 1, strlen(func_name) - 2);
+ memset(func_name, 0, PATH_MAX);
+ strcpy(func_name, new_func_name);
+ free(new_func_name);
+}
+
+/* Read a word from a query */
+char *
+readWord(FILE *fin, char *word)
+{
+ char c;
+ memset(word, 0, strlen(word));
+ do
+ {
+ c = tolower(getc(fin));
+ if (isSpace(c) || c == '(') /* Space or open brace before function arguments */
+ {
+ if (word[0] == '\0') /* Spaces before the word */
+ continue;
+ else
+ break; /* Spaces after the word */
+ }
+ else
+ {
+ strncat(word, &c, 1);
+ }
+ } while (c != EOF);
+ return word;
+}
+
+/**
+ * Extract function name from query. During extracting we also check
+ * the query, but only the start of it. We expecting the pattern:
+ * `create [or replace] function {func_name}`
+ * If something is wrong we will not use function and leave
+ * the column without transforming.
+ *
+ * We don't check the full script because we are guessing that this script will be
+ * run by users who has access to run them and will not harm theirs own data
+ */
+int
+extractFunctionNameFromQueryFile(char *filename, char *func_name)
+{
+ FILE *fin;
+ char *word;
+
+ memset(func_name, 0, REL_SIZE);
+ fin = NULL;
+ if (filename[0] != '\0')
+ {
+ fin = fopen(filename, "r");
+ }
+ if (fin == NULL)
+ {
+ pg_log_warning("Problem with file \'%s\"", filename);
+ }
+ else
+ {
+ word = malloc(REL_SIZE);
+ memset(word, 0, REL_SIZE);
+ if (strcmp(readWord(fin, word), "create") == 0) /* reading 'create' */
+ {
+ if (strcmp(readWord(fin, word), "or") == 0) /* reading 'or' | 'function' */
+ {
+ if (strcmp(readWord(fin, word), "replace") != 0) /* reading 'replace' */
+ {
+ pg_log_warning("Keyword 'replace' was expected, but found '%s'. Check query for creating a function '%s'.\n",
+ word, filename);
+ goto free_resources;
+ }
+ else
+ {
+ readWord(fin, word); /* reading 'function' */
+ }
+ }
+ }
+ else
+ {
+ pg_log_warning("Keyword 'create' was expected, but found '%s'. Check query for creating a function '%s'.\n", word,
+ filename);
+ goto free_resources;
+ }
+ if (strcmp(word, "function") == 0)
+ {
+ strcpy(func_name, readWord(fin, word));
+ }
+ else
+ {
+ pg_log_warning("Keyword 'function' was expected, but found '%s'. Check query for creating a function '%s'.\n", word,
+ filename);
+ goto free_resources;
+ }
+ free_resources:
+ free(word);
+ fclose(fin);
+ }
+ return func_name[0] != '\0'; /* If we got a function name, then - return 0, else - return 1 */
+}
+
+/**
+ * If there is a path (the first symbol is a quote '"'), then store this path in masking_func_query_path
+ * and write to the first argument (func_path) name of the function from the query in the file
+ * If there is not a path - do nothing
+*/
+void
+extractFuncNameIfPath(char *func_path, SimpleStringList *masking_func_query_path)
+{
+ char *func_name;
+ if (func_path[0] == '"')
+ {
+ func_name = malloc(REL_SIZE);
+ removeQuotes(func_path);
+ if (extractFunctionNameFromQueryFile(func_path, func_name) != 0) /* Read function name from query and store in func_name */
+ {
+ if (!simple_string_list_member(masking_func_query_path, func_path))
+ {
+ simple_string_list_append(masking_func_query_path, func_path);
+ }
+ strcpy(func_path, func_name); /* Store func_name in func_path to throw it to upper function */
+ }
+ free(func_name);
+ }
+}
+
+/* Read whole script from the file `filename` */
+char *
+readQueryForCreatingFunction(char *filename)
+{
+ FILE *fin;
+ char *query;
+ long fsize;
+ query = malloc(sizeof(char));
+ memset(query, 0, sizeof(char));
+ fin = fopen(filename, "r");
+ if (fin != NULL)
+ {
+ fseek(fin, 0L, SEEK_END);
+ fsize = ftell(fin);
+ fseek(fin, 0L, SEEK_SET);
+
+ query = (char *) calloc(fsize + 1, sizeof(char));
+
+ fsize = (int) fread(query, sizeof(char), fsize, fin);
+ if (fsize==0)
+ {
+ pg_log_error("File is empty `%s`", filename);
+ }
+ fclose(fin);
+ }
+ return query;
+}
+
+void
+maskingColumns(char *schema_name, char *table_name, char* column_list, MaskingMap *masking_map, PQExpBuffer *q)
+{
+ char *current_column_name = strtok(column_list, " ,()");
+ char *masked_query = malloc(COL_WITH_FUNC_SIZE);
+ char *func_with_column = malloc(COL_WITH_FUNC_SIZE);
+
+ while (current_column_name != NULL)
+ {
+ func_with_column = addFunctionToColumn(schema_name, table_name, current_column_name, masking_map);
+ if (func_with_column[0] != '\0')
+ {
+ strcpy(masked_query, func_with_column);
+ }
+ else
+ {
+ strcpy(masked_query, current_column_name);
+ }
+ current_column_name = strtok(NULL, " ,()");
+ if (current_column_name != NULL)
+ strcat(masked_query, ",");
+ appendPQExpBufferStr(*q, masked_query);
+ }
+ free(masked_query);
+ free(func_with_column);
+}
+
+/**
+ * getMaskingPatternFromFile
+ *
+ * Parse the specified masking file with description of what we need to mask into masking_map
+ */
+int
+getMaskingPatternFromFile(const char *filename, MaskingMap *masking_map, SimpleStringList *masking_func_query_path)
+{
+ FILE *fin;
+ int exit_result;
+ if (filename[0]=='\0')
+ {
+ pg_log_error("--masking filename shouldn't be empty");
+ return 1;
+ }
+
+ fin = fopen(filename, "r");
+
+ if (fin == NULL)
+ {
+ pg_log_error("--masking couldn't open file `%s`", filename);
+ return 1;
+ }
+
+ exit_result = readMaskingPatternFromFile(fin, masking_map, masking_func_query_path);
+ fclose(fin);
+ return exit_result;
+}
+
+/**
+ * Default masking function
+ * Full masking according to the data types. Returns 'XXXX' for string data types [text, varchar, character].
+ * Returns 0 for numeric data types [int, numeric, real, smallint, bigint]
+ * Returns '1900-01-01' for date and '1900-01-01 00:00:00' for timestamp
+ */
+char *
+default_functions()
+{
+ return "CREATE SCHEMA IF NOT EXISTS _masking_function;\n"
+ "CREATE OR REPLACE FUNCTION _masking_function.default(in text, out text)\n"
+ " AS $$ SELECT 'XXXX' $$\n"
+ " LANGUAGE SQL;\n"
+ "\n"
+ "CREATE OR REPLACE FUNCTION _masking_function.default(in real, out real)\n"
+ " AS $$ SELECT 0 $$\n"
+ " LANGUAGE SQL;\n"
+ "\n"
+ "CREATE OR REPLACE FUNCTION _masking_function.default(in date, out date)\n"
+ " AS $$ SELECT DATE '1900-01-01' $$\n"
+ " LANGUAGE SQL;\n"
+ "\n"
+ "CREATE OR REPLACE FUNCTION _masking_function.default(in timestamp, out timestamp)\n"
+ " AS $$ SELECT TIMESTAMP '1900-01-01 00:00:00' $$\n"
+ " LANGUAGE SQL;\n"
+ " \n"
+ "CREATE OR REPLACE FUNCTION _masking_function.default(in timestamptz, out timestamptz)\n"
+ " AS $$ SELECT TIMESTAMPTZ '1900-01-01 00:00:00-00' $$\n"
+ " LANGUAGE SQL;\n"
+ " \n"
+ "CREATE OR REPLACE FUNCTION _masking_function.default(in time, out time)\n"
+ " AS $$ SELECT TIME '00:00:00' $$\n"
+ " LANGUAGE SQL;\n"
+ " \n"
+ "CREATE OR REPLACE FUNCTION _masking_function.default(in timetz, out timetz)\n"
+ " AS $$ SELECT TIMETZ '00:00:00-00' $$\n"
+ " LANGUAGE SQL;\n"
+ "\n"
+ "CREATE OR REPLACE FUNCTION _masking_function.default(in interval, out interval)\n"
+ " AS $$ SELECT INTERVAL '1 year 2 months 3 days' $$\n"
+ " LANGUAGE SQL;\n"
+ "\n"
+ "CREATE OR REPLACE FUNCTION _masking_function.default(in box, out box)\n"
+ " AS $$ SELECT box(circle '((0,0),2.0)') $$\n"
+ " LANGUAGE SQL;\n"
+ "\n"
+ "CREATE OR REPLACE FUNCTION _masking_function.default(in circle, out circle)\n"
+ " AS $$ SELECT circle(point '(0,0)', 0) $$\n"
+ " LANGUAGE SQL;\n"
+ "\n"
+ "CREATE OR REPLACE FUNCTION _masking_function.default(in path, out path)\n"
+ " AS $$ SELECT '[ ( 0 , 1 ) , ( 1 , 2 ) ]'::path $$\n"
+ " LANGUAGE SQL;\n"
+ "\n"
+ "CREATE OR REPLACE FUNCTION _masking_function.default(in point, out point)\n"
+ " AS $$ SELECT '(0, 0)'::point $$\n"
+ " LANGUAGE SQL;\n"
+ "\n"
+ "CREATE OR REPLACE FUNCTION _masking_function.default(in polygon , out polygon)\n"
+ " AS $$ SELECT '( ( 0 , 0 ) , ( 0 , 0 ) )'::polygon $$\n"
+ " LANGUAGE SQL;\n"
+ "\n"
+ "CREATE OR REPLACE FUNCTION _masking_function.default(in bytea, out bytea)\n"
+ " AS $$ SELECT '\\000'::bytea $$\n"
+ " LANGUAGE sql;\n"
+ "\n"
+ "CREATE OR REPLACE FUNCTION _masking_function.default(in inet, out inet)\n"
+ " AS $$ SELECT '0.0.0.0'::inet $$\n"
+ " LANGUAGE sql;\n"
+ "\n"
+ "CREATE OR REPLACE FUNCTION _masking_function.default(in cidr, out cidr)\n"
+ " AS $$ SELECT '0.0.0.0'::cidr $$\n"
+ " LANGUAGE sql;\n"
+ "\n"
+ "CREATE OR REPLACE FUNCTION _masking_function.default(in macaddr, out macaddr)\n"
+ " AS $$ SELECT macaddr '0:0:0:0:0:ab' $$\n"
+ " LANGUAGE sql;\n"
+ "\n"
+ "CREATE OR REPLACE FUNCTION _masking_function.default(in json, out json)\n"
+ " AS $$ SELECT '{\"a\":\"foo\", \"b\":\"bar\"}'::json $$\n"
+ " LANGUAGE sql;\n"
+ "\n"
+ "CREATE OR REPLACE FUNCTION _masking_function.default(in jsonb, out jsonb)\n"
+ " AS $$ SELECT '{\"a\":1, \"b\":2}'::jsonb $$\n"
+ " LANGUAGE sql;\n"
+ "\n"
+ "CREATE OR REPLACE FUNCTION _masking_function.default(in line, out line)\n"
+ " AS $$ SELECT '{1,2,3}'::line $$\n"
+ " LANGUAGE sql;\n"
+ "\n"
+ "CREATE OR REPLACE FUNCTION _masking_function.default(in lseg, out lseg)\n"
+ " AS $$ SELECT '((0,0),(0,0))'::lseg $$\n"
+ " LANGUAGE sql;\n"
+ "\n"
+ "CREATE OR REPLACE FUNCTION _masking_function.default(in bit, out bit)\n"
+ " AS $$ SELECT '0'::bit $$\n"
+ " LANGUAGE sql; \n"
+ "\n"
+ "CREATE OR REPLACE FUNCTION _masking_function.default(in boolean, out boolean)\n"
+ " AS $$ SELECT true $$\n"
+ " LANGUAGE sql; \n"
+ "\n"
+ "CREATE OR REPLACE FUNCTION _masking_function.default(in money, out money)\n"
+ " AS $$ SELECT 0 $$\n"
+ " LANGUAGE sql; \n"
+ "\n"
+ "CREATE OR REPLACE FUNCTION _masking_function.default(in pg_lsn, out pg_lsn)\n"
+ " AS $$ SELECT '0/0'::pg_lsn $$\n"
+ " LANGUAGE sql; \n"
+ " \n"
+ "CREATE OR REPLACE FUNCTION _masking_function.default(in uuid, out uuid)\n"
+ " AS $$ SELECT '00000000-0000-0000-0000-000000000000'::uuid $$\n"
+ " LANGUAGE sql; \n"
+ " \n"
+ "CREATE OR REPLACE FUNCTION _masking_function.default(in tsvector, out tsvector)\n"
+ " AS $$ SELECT 'a:1'::tsvector $$\n"
+ " LANGUAGE sql; ";
+}
diff --git a/src/bin/pg_dump/masking.h b/src/bin/pg_dump/masking.h
new file mode 100644
index 0000000000..b879b4d41e
--- /dev/null
+++ b/src/bin/pg_dump/masking.h
@@ -0,0 +1,70 @@
+/*-------------------------------------------------------------------------
+ *
+ * masking.h
+ *
+ * Data masking tool for pg_dump
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/bin/pg_dump/masking.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef MASKING_H
+#define MASKING_H
+
+#include
+#include
+#include
+#include
+#include
+#include "fe_utils/option_utils.h"
+#include "fe_utils/simple_list.h"
+#include "dumputils.h"
+
+typedef struct _pair
+{
+ char *key;
+ char *value;
+} Pair;
+
+typedef struct MaskingMap
+{
+ Pair **data;
+ int size;
+ int capacity;
+} MaskingMap;
+
+enum
+ParsingState
+{
+ SCHEMA_NAME,
+ TABLE_NAME,
+ COLUMN_NAME,
+ FUNCTION_NAME,
+ WAIT_COLON,
+ WAIT_OPEN_BRACE,
+ WAIT_CLOSE_BRACE,
+ WAIT_COMMA
+};
+
+struct
+MaskingDebugDetails
+{
+ int line_num;
+ int symbol_num;
+ bool is_comment;
+ enum ParsingState parsing_state;
+};
+
+char *addFunctionToColumn(char *schema_name, char *table_name, char *column_name, MaskingMap *map);
+char *default_functions(void);
+int getMaskingPatternFromFile(const char *filename, MaskingMap *masking_map, SimpleStringList *masking_func_query_path);
+void maskingColumns(char *schema_name, char *table_name, char* column_list, MaskingMap *masking_map, PQExpBuffer *q);
+MaskingMap *newMaskingMap(void);
+extern int readMaskingPatternFromFile(FILE *fin, MaskingMap *map, SimpleStringList *masking_func_query_path);
+char *readQueryForCreatingFunction(char *filename);
+
+#endif /* MASKING_H */
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index bd9b066e4e..a857f91cf9 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -59,8 +59,10 @@
#include "dumputils.h"
#include "fe_utils/option_utils.h"
#include "fe_utils/string_utils.h"
+#include "fe_utils/query_utils.h"
#include "getopt_long.h"
#include "libpq/libpq-fs.h"
+#include "masking.h"
#include "parallel.h"
#include "pg_backup_db.h"
#include "pg_backup_utils.h"
@@ -127,6 +129,9 @@ static SimpleOidList foreign_servers_include_oids = {NULL, NULL};
static SimpleStringList extension_include_patterns = {NULL, NULL};
static SimpleOidList extension_include_oids = {NULL, NULL};
+/* List of path to query with masking functions, that must be created before starting dump */
+static SimpleStringList masking_func_query_path = {NULL, NULL};
+static MaskingMap *masking_map; /* Map of columns and functions for data masking */
static const CatalogId nilCatalogId = {0, 0};
@@ -318,7 +323,7 @@ static void appendReloptionsArrayAH(PQExpBuffer buffer, const char *reloptions,
static char *get_synchronized_snapshot(Archive *fout);
static void setupDumpWorker(Archive *AH);
static TableInfo *getRootTableInfo(const TableInfo *tbinfo);
-
+static int createMaskingFunctions(Archive *AH, SimpleStringList *masking_func_query_path);
int
main(int argc, char **argv)
@@ -397,7 +402,8 @@ main(int argc, char **argv)
{"no-tablespaces", no_argument, &dopt.outputNoTablespaces, 1},
{"quote-all-identifiers", no_argument, "e_all_identifiers, 1},
{"load-via-partition-root", no_argument, &dopt.load_via_partition_root, 1},
- {"role", required_argument, NULL, 3},
+ {"masking", required_argument, NULL, 13},
+ {"role", required_argument, NULL, 3},
{"section", required_argument, NULL, 5},
{"serializable-deferrable", no_argument, &dopt.serializable_deferrable, 1},
{"snapshot", required_argument, NULL, 6},
@@ -414,7 +420,7 @@ main(int argc, char **argv)
{"rows-per-insert", required_argument, NULL, 10},
{"include-foreign-data", required_argument, NULL, 11},
- {NULL, 0, NULL, 0}
+ {NULL, 0, NULL, 0}
};
pg_logging_init(argv[0]);
@@ -623,6 +629,13 @@ main(int argc, char **argv)
optarg);
break;
+ case 13: /* masking */
+ masking_map = newMaskingMap();
+ /* If reading of masking patterns was unsuccessful, then exit */
+ if (getMaskingPatternFromFile(optarg, masking_map, &masking_func_query_path) != 0)
+ exit_nicely(1);
+ break;
+
default:
/* getopt_long already emitted a complaint */
pg_log_error_hint("Try \"%s --help\" for more information.", progname);
@@ -743,6 +756,10 @@ main(int argc, char **argv)
* death.
*/
ConnectDatabase(fout, &dopt.cparams, false);
+ if (masking_map) /* If run with --masking option */
+ {
+ createMaskingFunctions(fout, &masking_func_query_path);
+ }
setup_connection(fout, dumpencoding, dumpsnapshot, use_role);
/*
@@ -1035,6 +1052,7 @@ help(const char *progname)
printf(_(" --inserts dump data as INSERT commands, rather than COPY\n"));
printf(_(" --load-via-partition-root load partitions via the root table\n"));
printf(_(" --no-comments do not dump comments\n"));
+ printf(_(" --masking data masking, helps with hiding sensitive data\n"));
printf(_(" --no-publications do not dump publications\n"));
printf(_(" --no-security-labels do not dump security label assignments\n"));
printf(_(" --no-subscriptions do not dump subscriptions\n"));
@@ -1991,17 +2009,25 @@ dumpTableData_copy(Archive *fout, const void *dcontext)
/*
* Use COPY (SELECT ...) TO when dumping a foreign table's data, and when
- * a filter condition was specified. For other cases a simple COPY
- * suffices.
+ * a filter (tdinfo->filtercond) or masking (masking_map) condition was specified.
+ * For other cases a simple COPY suffices.
*/
- if (tdinfo->filtercond || tbinfo->relkind == RELKIND_FOREIGN_TABLE)
+ if (tdinfo->filtercond || tbinfo->relkind == RELKIND_FOREIGN_TABLE || masking_map)
{
appendPQExpBufferStr(q, "COPY (SELECT ");
/* klugery to get rid of parens in column list */
if (strlen(column_list) > 2)
{
- appendPQExpBufferStr(q, column_list + 1);
- q->data[q->len - 1] = ' ';
+ if (masking_map) /* If run with --masking option */
+ {
+ maskingColumns(tbinfo->dobj.namespace->dobj.name, tbinfo->dobj.name, pg_strdup(column_list), masking_map, &q);
+ appendPQExpBufferStr(q, " ");
+ }
+ else
+ {
+ appendPQExpBufferStr(q, column_list + 1);
+ q->data[q->len - 1] = ' ';
+ }
}
else
appendPQExpBufferStr(q, "* ");
@@ -2010,13 +2036,13 @@ dumpTableData_copy(Archive *fout, const void *dcontext)
fmtQualifiedDumpable(tbinfo),
tdinfo->filtercond ? tdinfo->filtercond : "");
}
- else
- {
- appendPQExpBuffer(q, "COPY %s %s TO stdout;",
- fmtQualifiedDumpable(tbinfo),
- column_list);
- }
- res = ExecuteSqlQuery(fout, q->data, PGRES_COPY_OUT);
+ else
+ {
+ appendPQExpBuffer(q, "COPY %s %s TO stdout;",
+ fmtQualifiedDumpable(tbinfo),
+ column_list);
+ }
+ res = ExecuteSqlQuery(fout, q->data, PGRES_COPY_OUT);
PQclear(res);
destroyPQExpBuffer(clistBuf);
@@ -2153,8 +2179,25 @@ dumpTableData_insert(Archive *fout, const void *dcontext)
appendPQExpBufferStr(q, ", ");
if (tbinfo->attgenerated[i])
appendPQExpBufferStr(q, "NULL");
+ else if (masking_map) /* If run with --masking option */
+ {
+ char *column_with_fun;
+ column_with_fun=addFunctionToColumn(tbinfo->dobj.namespace->dobj.name, tbinfo->dobj.name,
+ tbinfo->attnames[i], masking_map);
+
+ if (column_with_fun[0] != '\0')
+ {
+ appendPQExpBufferStr(q, column_with_fun);
+ }
+ else
+ {
+ appendPQExpBufferStr(q, fmtId(tbinfo->attnames[i]));
+ }
+ free(column_with_fun);
+ }
else
- appendPQExpBufferStr(q, fmtId(tbinfo->attnames[i]));
+ appendPQExpBufferStr(q, fmtId(tbinfo->attnames[i]));
+
attgenerated[nfields] = tbinfo->attgenerated[i];
nfields++;
}
@@ -18198,3 +18241,50 @@ appendReloptionsArrayAH(PQExpBuffer buffer, const char *reloptions,
if (!res)
pg_log_warning("could not parse %s array", "reloptions");
}
+
+/**
+ * Read paths to functions from `masking_func_query_path`,
+ * read query inside the files and run them. We checked them
+ * in function masking.c:extractFunctionNameFromQueryFile.
+ */
+int
+createMaskingFunctions(Archive *AH, SimpleStringList *masking_func_query_path)
+{
+ int exit_result;
+ PGconn *conn = GetConnection(AH);
+ char *filename;
+ char *query;
+ bool result;
+
+ exit_result=0;
+ result = false;
+ /* Read all custom masking functions and create them */
+ for (SimpleStringListCell *cell = masking_func_query_path->head; cell; cell = cell->next)
+ {
+ filename=cell->val;
+ query = readQueryForCreatingFunction(filename);
+ if (query[0]=='\0')
+ {
+ pg_log_warning("Query is empty. Check file `%s`", filename);
+ exit_result++;
+ }
+ else
+ {
+ result = executeMaintenanceCommand(conn, query, true);
+ }
+
+ if (!result)
+ {
+ pg_log_warning("Failed execution of query from file \"%s\"", filename);
+ }
+ free(query);
+ }
+ /* Read all default functions and create them */
+ result = executeMaintenanceCommand(conn, default_functions(), true);
+ if (!result)
+ {
+ pg_log_warning("Problem during creating default functions from method `masking.c:default_functions`");
+ exit_result++;
+ }
+ return exit_result;
+}
diff --git a/src/bin/pg_dump/t/011_dump_masking.pl b/src/bin/pg_dump/t/011_dump_masking.pl
new file mode 100644
index 0000000000..6416f438c3
--- /dev/null
+++ b/src/bin/pg_dump/t/011_dump_masking.pl
@@ -0,0 +1,540 @@
+# Copyright (c) 2021, PostgreSQL Global Development Group
+
+use strict;
+use warnings;
+
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More tests => 84;
+
+my $tempdir = PostgreSQL::Test::Utils::tempdir;
+my $inputfile;
+
+my $node = PostgreSQL::Test::Cluster->new('main');
+my $port = $node->port;
+my $backupdir = $node->backup_dir;
+my $plainfile = "$backupdir/plain_copy.sql";
+my $plainfile_insert = "$backupdir/plain_insert.sql";
+my $testdumo = "$backupdir/testdumo.sql";
+my $dumpfile = "$backupdir/options_plain.sql";
+my $dumpdir = "$backupdir/parallel";
+my $dumpjobfile = "$backupdir/parallel/toc.dat'";
+
+$node->init;
+$node->start;
+
+# Generate test objects
+$node->safe_psql('postgres', 'CREATE FOREIGN DATA WRAPPER dummy;');
+$node->safe_psql('postgres', 'CREATE SERVER dummyserver FOREIGN DATA WRAPPER dummy;');
+
+$node->safe_psql('postgres', "CREATE SCHEMA schema1;");
+$node->safe_psql('postgres', "CREATE SCHEMA schema2;");
+$node->safe_psql('postgres', "CREATE SCHEMA schema3;");
+$node->safe_psql('postgres', "CREATE SCHEMA schema4;");
+$node->safe_psql('postgres', "CREATE SCHEMA schema5;");
+$node->safe_psql('postgres', "CREATE SCHEMA schema6;");
+$node->safe_psql('postgres', "CREATE SCHEMA schema7;");
+$node->safe_psql('postgres', "CREATE SCHEMA large_schema_name1234567890123456789012345678901234567890123456;");
+
+$node->safe_psql('postgres', "CREATE TABLE schema1.table1(field1 varchar);");
+$node->safe_psql('postgres', "CREATE TABLE schema2.table2(field2 varchar);");
+$node->safe_psql('postgres', "CREATE TABLE schema3.table3(field3 varchar);");
+$node->safe_psql('postgres', "CREATE TABLE schema4.table4(field41 varchar, field42 varchar);");
+$node->safe_psql('postgres', "CREATE TABLE schema5.table51(field511 varchar, email varchar);");
+$node->safe_psql('postgres', "CREATE TABLE schema5.table52(email varchar, field522 varchar);");
+$node->safe_psql('postgres', "CREATE TABLE schema6.table61(email varchar, field612 varchar);");
+$node->safe_psql('postgres', "CREATE TABLE schema6.table62(field621 varchar, email varchar);");
+$node->safe_psql('postgres', "CREATE TABLE schema7.table7(field71 varchar, phone varchar);");
+$node->safe_psql('postgres', "CREATE TABLE schema7.table8(phone varchar, field82 varchar);");
+$node->safe_psql('postgres', "CREATE TABLE large_schema_name1234567890123456789012345678901234567890123456.large_table_name12345678901234567890123456789012345678901234567(large_field_1_1234567890123456789012345678901234567890123456789 varchar, large_field_2_1234567890123456789012345678901234567890123456789 varchar);");
+
+$node->safe_psql('postgres', "INSERT INTO schema1.table1 VALUES('value1');");
+$node->safe_psql('postgres', "INSERT INTO schema2.table2 VALUES('value2');");
+$node->safe_psql('postgres', "INSERT INTO schema3.table3 VALUES('value3');");
+$node->safe_psql('postgres', "INSERT INTO schema4.table4 VALUES('value41', 'value42');");
+$node->safe_psql('postgres', "INSERT INTO schema5.table51 VALUES('value511', 'value512');");
+$node->safe_psql('postgres', "INSERT INTO schema5.table52 VALUES('value521', 'value522');");
+$node->safe_psql('postgres', "INSERT INTO schema6.table61 VALUES('value611', 'value612');");
+$node->safe_psql('postgres', "INSERT INTO schema6.table62 VALUES('value621', 'value622');");
+$node->safe_psql('postgres', "INSERT INTO schema7.table7 VALUES('value71', 'value72');");
+$node->safe_psql('postgres', "INSERT INTO schema7.table8 VALUES('value81', 'value82');");
+$node->safe_psql('postgres', "INSERT INTO large_schema_name1234567890123456789012345678901234567890123456.large_table_name12345678901234567890123456789012345678901234567 VALUES('large_value_1_1234567890123456789012345678901234567890123456789', 'large_value_2');");
+
+
+#########################################
+# Use masking with custom function from file
+
+# Create files with custom function
+open $inputfile, '>>', "$tempdir/custom_function_file.txt"
+ or die "unable to open custom_function_file for writing";
+print $inputfile "
+ CREATE FUNCTION schema3.custom_function(in text, out text)
+ AS \$\$ SELECT \$1 || ' custom' \$\$
+ LANGUAGE SQL;";
+close $inputfile;
+
+open $inputfile, '>>', "$tempdir/mask_email.sql"
+ or die "unable to open mask_email.sql for writing";
+print $inputfile "
+ CREATE FUNCTION public.mask_email(in text, out text)
+ AS \$\$ SELECT \$1 || ' email' \$\$
+ LANGUAGE SQL;";
+close $inputfile;
+
+open $inputfile, '>>', "$tempdir/mask_phone.sql"
+ or die "unable to open mask_phone.sql for writing";
+print $inputfile "
+ CREATE FUNCTION public.mask_phone(in text, out text)
+ AS \$\$ SELECT \$1 || ' phone' \$\$
+ LANGUAGE SQL;";
+close $inputfile;
+
+# Create masking pattern file
+open $inputfile, '>>', "$tempdir/masking_file.txt"
+ or die "unable to open masking file for writing";
+print $inputfile "// First comment
+ schema1
+ {
+ table1 // Second comment
+ {
+ field1: default
+ }
+ }
+
+ /*
+ Third comment
+ */
+ schema2
+ {
+ table2 {
+ not_exist_field: default
+ }
+ }
+ /**
+ * Fourth multi line comment
+ */
+ schema3 /* Fifth multi line comment */
+ {
+ table3
+ {
+ field3: \"$tempdir/custom_function_file.txt\"//Sixth comment
+ }
+ }
+ schema4
+ {
+ table4
+ {
+ field41: schema3.custom_function,
+ field42: \"$tempdir/custom_function_file.txt\"
+ }
+ }
+ default
+ {
+ default
+ {
+ email: \"$tempdir/mask_email.sql\"
+ }
+ table7
+ {
+ phone: \"$tempdir/mask_phone.sql\"
+ }
+ }
+ large_schema_name1234567890123456789012345678901234567890123456
+ {
+ large_table_name12345678901234567890123456789012345678901234567
+ {
+ large_field_1_1234567890123456789012345678901234567890123456789: default,
+ large_field_2_1234567890123456789012345678901234567890123456789: schema3.custom_function
+ }
+ }
+ ";
+close $inputfile;
+
+
+command_ok(
+ [
+ 'pg_dump', '-p', $port, '-f', $plainfile,
+ "--masking=$tempdir/masking_file.txt"
+ ],
+ "1. Run masking without options");
+
+my $dump = slurp_file($plainfile);
+ok($dump =~ qr/^COPY schema1\.table1 \(field1\) FROM stdin\;\nXXXX/m, "2. [Default function] Field1 was masked");
+ok($dump =~ qr/^COPY schema2\.table2 \(field2\) FROM stdin\;\nvalue2/m, "3. Field2 was not masked");
+ok($dump =~ qr/^COPY schema3\.table3 \(field3\) FROM stdin\;\nvalue3 custom/m, "4. [Function from file] Field3 was masked");
+ok($dump =~ qr/^COPY schema4\.table4 \(field41\, field42\) FROM stdin\;\nvalue41 custom value42 custom/m, "5. [Function from file] Already created custom function can be used second time");
+ok($dump =~ qr/^COPY schema5\.table51 \(field511\, email\) FROM stdin\;\nvalue511 value512 email/m, "6. [Default schema and table] Masked only field with name `email`");
+ok($dump =~ qr/^COPY schema5\.table52 \(email\, field522\) FROM stdin\;\nvalue521 email value522/m, "7. [Default schema and table] Masked only field with name `email`");
+ok($dump =~ qr/^COPY schema6\.table61 \(email\, field612\) FROM stdin\;\nvalue611 email value612/m, "8. [Default schema and table] Masked only field with name `email`");
+ok($dump =~ qr/^COPY schema6\.table62 \(field621\, email\) FROM stdin\;\nvalue621 value622 email/m, "9. [Default schema and table] Masked only field with name `email`");
+ok($dump =~ qr/^COPY schema7\.table7 \(field71\, phone\) FROM stdin\;\nvalue71 value72 phone/m, "10. [Default schema] Masked only field with name `phone` from table7");
+ok($dump =~ qr/^COPY schema7\.table8 \(phone\, field82\) FROM stdin\;\nvalue81 value82/m, "11. [Default schema] Masked only field with name `phone` from table7");
+ok($dump =~ qr/^COPY large_schema_name1234567890123456789012345678901234567890123456\.large_table_name12345678901234567890123456789012345678901234567 \(large_field_1_1234567890123456789012345678901234567890123456789\, large_field_2_1234567890123456789012345678901234567890123456789\) FROM stdin\;\nXXXX large_value_2 custom/m,
+"12. [Large values] Limit of relation name size is 63 symbols");
+
+command_ok(
+ [
+ 'pg_dump', '-p', $port, '-f', $plainfile_insert,
+ "--masking=$tempdir/masking_file.txt",
+ "--inserts"
+ ],
+ "13. Run masking with option --inserts");
+
+my $dump = slurp_file($plainfile_insert);
+ok($dump =~ qr/^INSERT INTO schema1\.table1 VALUES \(\'XXXX\'\)/m, "14. [Default function] Field1 was masked");
+ok($dump =~ qr/^CREATE FUNCTION _masking_function\.\"default\"\(text\, OUT text\) RETURNS text/m, "15. [Default function] Default functions were created");
+ok($dump =~ qr/^INSERT INTO schema2\.table2 VALUES \(\'value2\'\)/m, "16. Field2 was not masked");
+ok($dump =~ qr/^CREATE FUNCTION schema3\.custom_function\(text\, OUT text\) RETURNS text/m, "17. [Function from file] Custom function was created");
+ok($dump =~ qr/^INSERT INTO schema3\.table3 VALUES \(\'value3 custom\'\)/m, "18. [Function from file] Field3 was masked");
+ok($dump =~ qr/^INSERT INTO schema4\.table4 VALUES \(\'value41 custom\'\, \'value42 custom\'\)/m, "19. [Function from file] Already created custom function can be used second time");
+ok($dump =~ qr/^INSERT INTO schema5\.table51 VALUES \(\'value511\'\, \'value512 email\'\)/m, "20. [Default schema and table] Masked only field with name `email`");
+ok($dump =~ qr/^INSERT INTO schema5\.table52 VALUES \(\'value521 email\'\, \'value522\'\)/m, "21. [Default schema and table] Masked only field with name `email`");
+ok($dump =~ qr/^INSERT INTO schema6\.table61 VALUES \(\'value611 email\'\, \'value612\'\)/m, "22. [Default schema and table] Masked only field with name `email`");
+ok($dump =~ qr/^INSERT INTO schema6\.table62 VALUES \(\'value621\'\, \'value622 email\'\)/m, "23. [Default schema and table] Masked only field with name `email`");
+ok($dump =~ qr/^INSERT INTO schema7\.table7 VALUES \(\'value71\'\, \'value72 phone\'\)/m, "24. [Default schema] Masked only field with name `phone` from table7");
+ok($dump =~ qr/^INSERT INTO schema7\.table8 VALUES \(\'value81\'\, \'value82\'\)/m, "25. [Default schema] Masked only field with name `phone` from table7");
+ok($dump =~ qr/^INSERT INTO large_schema_name1234567890123456789012345678901234567890123456\.large_table_name12345678901234567890123456789012345678901234567 VALUES \(\'XXXX\'\, \'large_value_2 custom\'\)/m,
+"26. [Large values] Limit of relation name size is 63 symbols");
+
+#########################################
+# Run masking with other options
+#########################################
+
+command_ok(
+ [
+ 'pg_dump', '-p', $port, '-f', $dumpfile,
+ "--masking=$tempdir/masking_file.txt",
+ '--data-only'
+ ],
+ "27. Run masking with option --data-only");
+ok(slurp_file($dumpfile) =~ qr/^COPY schema7\.table7 \(field71\, phone\) FROM stdin\;\nvalue71 value72 phone/m,
+"28. Check dump after running masking with option --data-only");
+
+command_ok(
+ [
+ 'pg_dump', '-p', $port, '-f', $dumpfile,
+ "--masking=$tempdir/masking_file.txt",
+ '--clean'
+ ],
+ "29. Run masking with option --clean");
+ok(slurp_file($dumpfile) =~ qr/^COPY schema7\.table7 \(field71\, phone\) FROM stdin\;\nvalue71 value72 phone/m,
+"30. Check dump after running masking with option --clean");
+
+command_ok(
+ [
+ 'pg_dump', '-p', $port, '-f', $dumpfile,
+ "--masking=$tempdir/masking_file.txt",
+ '--create'
+ ],
+ "31. Run masking with option --create");
+ok(slurp_file($dumpfile) =~ qr/^COPY schema7\.table7 \(field71\, phone\) FROM stdin\;\nvalue71 value72 phone/m,
+"32. Check dump after running masking with option --create");
+
+command_ok(
+ [
+ 'pg_dump', '-p', $port, '-f', $dumpfile,
+ "--masking=$tempdir/masking_file.txt",
+ "--encoding=UTF-8"
+ ],
+ "33. Run masking with option --encoding");
+ok(slurp_file($dumpfile) =~ qr/^COPY schema7\.table7 \(field71\, phone\) FROM stdin\;\nvalue71 value72 phone/m,
+"34. Check dump after running masking with option --encoding");
+
+command_ok(
+ [
+ 'pg_dump', '-p', $port, '-f', $dumpfile,
+ "--masking=$tempdir/masking_file.txt",
+ "--schema=schema6"
+ ],
+ "35. Run masking with option --schema");
+ok(slurp_file($dumpfile) !~ qr/^COPY schema7\.table7 \(field71\, phone\) FROM stdin\;\nvalue71 value72 phone/m,
+"36. Check dump after running masking with option --schema");
+ok(slurp_file($dumpfile) =~ qr/^COPY schema6\.table62 \(field621\, email\) FROM stdin\;\nvalue621 value622 email/m,
+"37. Check dump after running masking with option --schema");
+
+command_ok(
+ [
+ 'pg_dump', '-p', $port, '-f', $dumpfile,
+ "--masking=$tempdir/masking_file.txt",
+ "--table=schema6.table62"
+ ],
+ "38. Run masking with option --table");
+ok(slurp_file($dumpfile) =~ qr/^COPY schema6\.table62 \(field621\, email\) FROM stdin\;\nvalue621 value622 email/m,
+"39. Check dump after running masking with option --table");
+ok(slurp_file($dumpfile) !~ qr/^COPY schema6\.table61 \(email\, field612\) FROM stdin\;\nvalue611 email value612/m,
+"40. Check dump after running masking with option --table");
+
+command_ok(
+ [
+ 'pg_dump', '-p', $port, '-f', $dumpfile,
+ "--masking=$tempdir/masking_file.txt",
+ "--exclude-table=schema6.table62"
+ ],
+ "41. Run masking with option --exclude-table");
+ok(slurp_file($dumpfile) !~ qr/^COPY schema6\.table62 \(field621\, email\) FROM stdin\;\nvalue621 value622 email/m,
+"42. Check dump after running masking with option --exclude-table");
+ok(slurp_file($dumpfile) =~ qr/^COPY schema6\.table61 \(email\, field612\) FROM stdin\;\nvalue611 email value612/m,
+"43. Check dump after running masking with option --exclude-table");
+
+command_ok(
+ [
+ 'pg_dump', '-p', $port, '-f', $dumpfile,
+ "--masking=$tempdir/masking_file.txt",
+ "--load-via-partition-root"
+ ],
+ "44. Run masking with option --load-via-partition-root");
+ok(slurp_file($dumpfile) =~ qr/^COPY schema6\.table62 \(field621\, email\) FROM stdin\;\nvalue621 value622 email/m,
+"45. Check dump after running masking with option --load-via-partition-root");
+
+command_ok(
+ [
+ 'pg_dump', '-p', $port, '-f', $dumpfile,
+ "--masking=$tempdir/masking_file.txt",
+ "--lock-wait-timeout=10"
+ ],
+ "46. Run masking with option --lock-wait-timeout");
+ok(slurp_file($dumpfile) =~ qr/^COPY schema6\.table62 \(field621\, email\) FROM stdin\;\nvalue621 value622 email/m,
+"47. Check dump after running masking with option --lock-wait-timeout");
+
+command_ok(
+ [
+ 'pg_dump', '-p', $port, '-f', $dumpfile,
+ "--masking=$tempdir/masking_file.txt",
+ "--no-comments",
+ "--no-publications",
+ "--no-security-labels",
+ "--no-subscriptions",
+ "--no-sync",
+ "--no-tablespaces",
+ "--no-toast-compression",
+ "--no-unlogged-table-data"
+ ],
+ "48. Run masking with skip options");
+ok(slurp_file($dumpfile) =~ qr/^COPY schema6\.table62 \(field621\, email\) FROM stdin\;\nvalue621 value622 email/m,
+"49. Check dump after running masking with skip options");
+
+command_ok(
+ [
+ 'pg_dump', '-p', $port, '-f', $dumpfile,
+ "--masking=$tempdir/masking_file.txt",
+ "--quote-all-identifiers"
+ ],
+ "50. Run masking with option --quote-all-identifiers");
+ok(slurp_file($dumpfile) =~ qr/^COPY \"schema4\"\.\"table4\" \(\"field41\"\, \"field42\"\) FROM stdin\;\nvalue41 value42/m,
+"51. Check dump after running masking with option --quote-all-identifiers");
+
+command_ok(
+ [
+ 'pg_dump', '-p', $port, '-f', $dumpfile,
+ "--masking=$tempdir/masking_file.txt",
+ "--rows-per-insert=10"
+ ],
+ "52. Run masking with option --rows-per-insert");
+ok(slurp_file($dumpfile) =~ qr/^INSERT INTO schema6\.table62 VALUES\n \(\'value621\'\, \'value622 email\'\)\;/m,
+"53. Check dump after running masking with option --rows-per-insert");
+
+command_ok(
+ [
+ 'pg_dump', '-p', $port, '-f', $dumpfile,
+ "--masking=$tempdir/masking_file.txt",
+ "--section=pre-data"
+ ],
+ "54. Run masking with option --section");
+ok(slurp_file($dumpfile) =~ qr/^CREATE FUNCTION public\.mask_phone\(text\, OUT text\) RETURNS text/m,
+"55. Check dump after running masking with option --section");
+ok(slurp_file($dumpfile) !~ qr/^INSERT INTO schema7\.table7 VALUES \(\'value71\'\, \'value72 phone\'\)/m,
+"56. Check dump after running masking with option --section");
+
+command_ok(
+ [
+ 'pg_dump', '-p', $port, '-f', $dumpfile,
+ "--masking=$tempdir/masking_file.txt",
+ "--serializable-deferrable"
+ ],
+ "57. Run masking with option --serializable-deferrable");
+ok(slurp_file($dumpfile) =~ qr/^COPY schema6\.table62 \(field621\, email\) FROM stdin\;\nvalue621 value622 email/m,
+"58. Check dump after running masking with option --serializable-deferrable");
+
+command_ok(
+ [
+ 'pg_dump', '-p', $port, '-f', $dumpfile,
+ "--masking=$tempdir/masking_file.txt",
+ '--strict-names', 'postgres'
+ ],
+ "59. Run masking with option --strict names");
+ok(slurp_file($dumpfile) =~ qr/^COPY schema6\.table62 \(field621\, email\) FROM stdin\;\nvalue621 value622 email/m,
+"60. Check dump after running masking with option --serializable-deferrable");
+
+command_ok(
+ [
+ 'pg_dump', '-p', $port, '-f', $dumpfile,
+ "--masking=$tempdir/masking_file.txt",
+ '--use-set-session-authorization'
+ ],
+ "61. Run masking with option --use-set-session-authorization");
+ok(slurp_file($dumpfile) =~ qr/^COPY schema6\.table62 \(field621\, email\) FROM stdin\;\nvalue621 value622 email/m,
+"62. Check dump after running masking with option --use-set-session-authorization");
+
+command_ok(
+ [
+ 'pg_dump', '-p', $port, '-f', $dumpfile,
+ "--masking=$tempdir/masking_file.txt",
+ "--compress=9"
+ ],
+ "63. Run masking with option --compress");
+
+command_ok(
+ [
+ 'pg_dump', '-p', $port, '-f', $dumpdir,
+ "--masking=$tempdir/masking_file.txt",
+ "--format=directory",
+ "--jobs=2"
+ ],
+ "64. Run masking with option --jobs");
+
+command_ok(
+ [
+ 'pg_dump', '-p', $port, '-f', $dumpfile,
+ "--masking=$tempdir/masking_file.txt",
+ "--no-privileges"
+ ],
+ "65. Run masking with option --no-privileges");
+ok(slurp_file($dumpfile) =~ qr/^COPY schema6\.table62 \(field621\, email\) FROM stdin\;\nvalue621 value622 email/m,
+"66. Check dump after running masking with option --no-privileges");
+
+command_ok(
+ [
+ 'pg_dump', '-p', $port, '-f', $dumpfile,
+ "--masking=$tempdir/masking_file.txt",
+ "--binary-upgrade"
+ ],
+ "67. Run masking with option --binary-upgrade");
+ok(slurp_file($dumpfile) =~ qr/^COPY schema6\.table62 \(field621\, email\) FROM stdin\;\nvalue621 value622 email/m,
+"68. Check dump after running masking with option --binary-upgrade");
+
+command_ok(
+ [
+ 'pg_dump', '-p', $port, '-f', $dumpfile,
+ "--masking=$tempdir/masking_file.txt",
+ "--column-inserts"
+ ],
+ "69. Run masking with option --column-inserts");
+ok(slurp_file($dumpfile) =~ qr/^INSERT INTO schema7\.table7 \(field71\, mask_phone\) VALUES \(\'value71\'\, \'value72 phone\'\)/m,
+"70. Check dump after running masking with option --column-inserts");
+
+command_ok(
+ [
+ 'pg_dump', '-p', $port, '-f', $dumpfile,
+ "--masking=$tempdir/masking_file.txt",
+ "--disable-dollar-quoting"
+ ],
+ "71. Run masking with option --disable-dollar-quoting");
+ok(slurp_file($dumpfile) =~ qr/^COPY schema6\.table62 \(field621\, email\) FROM stdin\;\nvalue621 value622 email/m,
+"72. Check dump after running masking with option --disable-dollar-quoting");
+
+command_ok(
+ [
+ 'pg_dump', '-p', $port, '-f', $dumpfile,
+ "--disable-triggers",
+ "--masking=$tempdir/masking_file.txt"
+ ],
+ "73. Run masking with option --disable-triggers");
+ok(slurp_file($dumpfile) =~ qr/^COPY schema6\.table62 \(field621\, email\) FROM stdin\;\nvalue621 value622 email/m,
+"74. Check dump after running masking with option --disable-triggers");
+
+command_ok(
+ [
+ 'pg_dump', '-p', $port, '-f', $dumpfile,
+ "--masking=$tempdir/masking_file.txt",
+ "--if-exists",
+ "--clean",
+ ],
+ "75. Run masking with option --if-exists");
+ok(slurp_file($dumpfile) =~ qr/^COPY schema6\.table62 \(field621\, email\) FROM stdin\;\nvalue621 value622 email/m,
+"76. Check dump after running masking with option --if-exists");
+
+command_ok(
+ [
+ 'pg_dump', '-p', $port, '-f', $dumpfile,
+ "--masking=$tempdir/masking_file.txt",
+ "--verbose"
+ ],
+ "77. Run masking with option --verbose");
+
+#########################################
+# Negative cases
+#########################################
+open $inputfile, '>>', "$tempdir/drop_table_script.sql"
+ or die "unable to open mask_phone.sql for writing";
+print $inputfile "DROP TABLE schema1.table1;";
+close $inputfile;
+
+open $inputfile, '>>', "$tempdir/masking_file_2.txt"
+ or die "unable to open masking file for writing";
+print $inputfile "schema1
+ {
+ table1
+ {
+ field1: \"$tempdir/drop_table_script.sql\"
+ }
+ }
+ ";
+close $inputfile;
+
+
+command_fails_like(
+ [
+ 'pg_dump', '-p', $port, '-f', $plainfile,
+ "--masking=$tempdir/masking_file_2.txt"
+ ],
+ qr/pg_dump: warning: Keyword 'create' was expected, but found 'drop'. Check query for creating a function/,
+ "78, 79. Run masking with wrong query");
+
+open $inputfile, '>>', "$tempdir/masking_file_2.txt"
+ or die "unable to open masking file for writing";
+print $inputfile "schema1
+ {
+ table1
+ {
+ field,
+ }
+ }
+ ";
+close $inputfile;
+
+command_fails_like(
+ [
+ 'pg_dump', '-p', $port, '-f', $plainfile,
+ "--masking=$tempdir/masking_file_2.txt"
+ ],
+ qr/\Qpg_dump: error: Error position (symbol ','): line: 12 pos: 31. Waiting symbol ':'\E/,
+ "80, 81. Run masking with wrong masking file. Unexpected terminal symbol.");
+
+open $inputfile, '>>', "$tempdir/masking_file_3.txt"
+ or die "unable to open masking file for writing";
+print $inputfile
+"schema1
+{
+table1
+ {
+ field1: function ,
+ field2: wrong function
+ }
+}";
+
+command_fails_like(
+ [
+ 'pg_dump', '-p', $port, '-f', $plainfile,
+ "--masking=$tempdir/masking_file_3.txt"
+ ],
+ qr/\Qpg_dump: error: Error position (symbol 'f'): line: 6 pos: 24. Syntax error. Relation name can't contain space symbols.\E/,
+ "82, 83. Run masking with wrong masking file. Function name with space.");
+close $inputfile;
+
+command_fails_like(
+ [ 'pg_dump', '--masking', '' ],
+ qr/\Qpg_dump: error: --masking filename shouldn't be empty\E/,
+ '84. File parameter shouldn\'t be empty');
+
+done_testing();
+
diff --git a/src/tools/msvc/Mkvcbuild.pm b/src/tools/msvc/Mkvcbuild.pm
index 83a3e40425..d606d77037 100644
--- a/src/tools/msvc/Mkvcbuild.pm
+++ b/src/tools/msvc/Mkvcbuild.pm
@@ -436,6 +436,7 @@ sub mkvcbuild
$pgdump->AddFile('src/bin/pg_dump/pg_dump.c');
$pgdump->AddFile('src/bin/pg_dump/common.c');
$pgdump->AddFile('src/bin/pg_dump/pg_dump_sort.c');
+ $pgdump->AddFile('src/bin/pg_dump/masking.c');
$pgdump->AddLibrary('ws2_32.lib');
my $pgdumpall = AddSimpleFrontend('pg_dump', 1);
--
2.25.1