Re: [GENERAL] pg_upgrade from 9.5 to 9.6 fails with "invalid argument" - Mailing list pgsql-hackers
From | Tom Lane |
---|---|
Subject | Re: [GENERAL] pg_upgrade from 9.5 to 9.6 fails with "invalid argument" |
Date | |
Msg-id | 29413.1475253531@sss.pgh.pa.us Whole thread Raw |
In response to | Re: [GENERAL] pg_upgrade from 9.5 to 9.6 fails with "invalid argument" (Masahiko Sawada <sawada.mshk@gmail.com>) |
List | pgsql-hackers |
Masahiko Sawada <sawada.mshk@gmail.com> writes: > +#ifndef WIN32 > if ((src_fd = open(fromfile, O_RDONLY, 0)) < 0) > +#else > + if ((src_fd = open(fromfile, O_RDONLY | O_BINARY)) < 0) > +#endif This is easier with PG_BINARY. Also, both open() calls need this. I'm rather inclined to also stick PG_BINARY into the open() calls in copy_file() as well. That function isn't actively broken since it's not used under Windows, but I think it's highly likely that the current bug arose from copying-and-pasting that code, so leaving it in a state that's unsafe for Windows is just asking for trouble. Attached is the patch I'm currently working with to fix all three of the portability issues here (PG_BINARY, endianness, alignment, and there's some cosmetic changes too). > + pg_log(PG_WARNING, > + "could not read expected bytes: read = %u, expected = %u\n", > + BLCKSZ, bytesRead); I think this is probably better done as part of a wholesale revision of the error reporting in this file. What I have in mind is to redefine copyFile, linkFile, and rewriteVisibilityMap as all being responsible for reporting their own errors and then exiting (so they can just return void). We'd need to pass in the schema name and table name so that they can include that context, so that the reports don't get any less complete than they were before, but that does not seem like a big downside. A variant would be to have them print the error messages but then return a bool success flag, leaving the caller to decide whether it's fatal or not. But that would be more complicated and I see no real reason to think pg_upgrade would ever need it. regards, tom lane diff --git a/src/bin/pg_upgrade/file.c b/src/bin/pg_upgrade/file.c index b33f0b4..9ae92a9 100644 *** a/src/bin/pg_upgrade/file.c --- b/src/bin/pg_upgrade/file.c *************** *** 18,25 **** #include <sys/stat.h> #include <fcntl.h> - #define BITS_PER_HEAPBLOCK_OLD 1 - #ifndef WIN32 static int copy_file(const char *fromfile, const char *tofile); --- 18,23 ---- *************** copy_file(const char *srcfile, const cha *** 84,93 **** return -1; } ! if ((src_fd = open(srcfile, O_RDONLY, 0)) < 0) return -1; ! if ((dest_fd = open(dstfile, O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR)) < 0) { save_errno = errno; --- 82,92 ---- return -1; } ! if ((src_fd = open(srcfile, O_RDONLY | PG_BINARY, 0)) < 0) return -1; ! if ((dest_fd = open(dstfile, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, ! S_IRUSR | S_IWUSR)) < 0) { save_errno = errno; *************** copy_file(const char *srcfile, const cha *** 153,183 **** * version, we could refuse to copy visibility maps from the old cluster * to the new cluster; the next VACUUM would recreate them, but at the * price of scanning the entire table. So, instead, we rewrite the old ! * visibility maps in the new format. That way, the all-visible bit ! * remains set for the pages for which it was set previously. The ! * all-frozen bit is never set by this conversion; we leave that to ! * VACUUM. */ const char * rewriteVisibilityMap(const char *fromfile, const char *tofile) { ! int src_fd = 0; ! int dst_fd = 0; ! char buffer[BLCKSZ]; ! ssize_t bytesRead; ssize_t totalBytesRead = 0; ssize_t src_filesize; int rewriteVmBytesPerPage; BlockNumber new_blkno = 0; struct stat statbuf; ! /* Compute we need how many old page bytes to rewrite a new page */ rewriteVmBytesPerPage = (BLCKSZ - SizeOfPageHeaderData) / 2; if ((fromfile == NULL) || (tofile == NULL)) return "Invalid old file or new file"; ! if ((src_fd = open(fromfile, O_RDONLY, 0)) < 0) return getErrorText(); if (fstat(src_fd, &statbuf) != 0) --- 152,181 ---- * version, we could refuse to copy visibility maps from the old cluster * to the new cluster; the next VACUUM would recreate them, but at the * price of scanning the entire table. So, instead, we rewrite the old ! * visibility maps in the new format. That way, the all-visible bits ! * remain set for the pages for which they were set previously. The ! * all-frozen bits are never set by this conversion; we leave that to VACUUM. */ const char * rewriteVisibilityMap(const char *fromfile, const char *tofile) { ! int src_fd; ! int dst_fd; ! char *buffer; ! char *new_vmbuf; ssize_t totalBytesRead = 0; ssize_t src_filesize; int rewriteVmBytesPerPage; BlockNumber new_blkno = 0; struct stat statbuf; ! /* Compute number of old-format bytes per new page */ rewriteVmBytesPerPage = (BLCKSZ - SizeOfPageHeaderData) / 2; if ((fromfile == NULL) || (tofile == NULL)) return "Invalid old file or new file"; ! if ((src_fd = open(fromfile, O_RDONLY | PG_BINARY, 0)) < 0) return getErrorText(); if (fstat(src_fd, &statbuf) != 0) *************** rewriteVisibilityMap(const char *fromfil *** 186,192 **** return getErrorText(); } ! if ((dst_fd = open(tofile, O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR)) < 0) { close(src_fd); return getErrorText(); --- 184,191 ---- return getErrorText(); } ! if ((dst_fd = open(tofile, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, ! S_IRUSR | S_IWUSR)) < 0) { close(src_fd); return getErrorText(); *************** rewriteVisibilityMap(const char *fromfil *** 196,208 **** src_filesize = statbuf.st_size; /* * Turn each visibility map page into 2 pages one by one. Each new page ! * has the same page header as the old one. If the last section of last ! * page is empty, we skip it, mostly to avoid turning one-page visibility ! * maps for small relations into two pages needlessly. */ while (totalBytesRead < src_filesize) { char *old_cur; char *old_break; char *old_blkend; --- 195,215 ---- src_filesize = statbuf.st_size; /* + * Malloc the work buffers, rather than making them local arrays, to + * ensure adequate alignment. + */ + buffer = (char *) pg_malloc(BLCKSZ); + new_vmbuf = (char *) pg_malloc(BLCKSZ); + + /* * Turn each visibility map page into 2 pages one by one. Each new page ! * has the same page header as the old one. If the last section of the ! * last page is empty, we skip it, mostly to avoid turning one-page ! * visibility maps for small relations into two pages needlessly. */ while (totalBytesRead < src_filesize) { + ssize_t bytesRead; char *old_cur; char *old_break; char *old_blkend; *************** rewriteVisibilityMap(const char *fromfil *** 225,285 **** /* * These old_* variables point to old visibility map page. old_cur * points to current position on old page. old_blkend points to end of ! * old block. old_break points to old page break position for ! * rewriting a new page. After wrote a new page, old_break proceeds ! * rewriteVmBytesPerPage bytes. */ old_cur = buffer + SizeOfPageHeaderData; old_blkend = buffer + bytesRead; old_break = old_cur + rewriteVmBytesPerPage; ! while (old_blkend >= old_break) { ! char new_vmbuf[BLCKSZ]; ! char *new_cur = new_vmbuf; bool empty = true; bool old_lastpart; ! /* Copy page header in advance */ memcpy(new_vmbuf, &pageheader, SizeOfPageHeaderData); ! /* Rewrite the last part of the old page? */ ! old_lastpart = old_lastblk && (old_blkend == old_break); ! new_cur += SizeOfPageHeaderData; /* Process old page bytes one by one, and turn it into new page. */ ! while (old_break > old_cur) { uint16 new_vmbits = 0; int i; /* Generate new format bits while keeping old information */ for (i = 0; i < BITS_PER_BYTE; i++) { ! uint8 byte = *(uint8 *) old_cur; ! ! if (byte & (1 << (BITS_PER_HEAPBLOCK_OLD * i))) { empty = false; ! new_vmbits |= 1 << (BITS_PER_HEAPBLOCK * i); } } /* Copy new visibility map bit to new format page */ ! memcpy(new_cur, &new_vmbits, BITS_PER_HEAPBLOCK); ! old_cur += BITS_PER_HEAPBLOCK_OLD; new_cur += BITS_PER_HEAPBLOCK; } ! /* If the last part of the old page is empty, skip writing it */ if (old_lastpart && empty) break; ! /* Set new checksum for a visibility map page (if enabled) */ ! if (old_cluster.controldata.data_checksum_version != 0 && ! new_cluster.controldata.data_checksum_version != 0) ((PageHeader) new_vmbuf)->pd_checksum = pg_checksum_page(new_vmbuf, new_blkno); --- 232,290 ---- /* * These old_* variables point to old visibility map page. old_cur * points to current position on old page. old_blkend points to end of ! * old block. old_break is the end+1 position on the old page for the ! * data that will be transferred to the current new page. */ old_cur = buffer + SizeOfPageHeaderData; old_blkend = buffer + bytesRead; old_break = old_cur + rewriteVmBytesPerPage; ! while (old_break <= old_blkend) { ! char *new_cur; bool empty = true; bool old_lastpart; ! /* First, copy old page header to new page */ memcpy(new_vmbuf, &pageheader, SizeOfPageHeaderData); ! /* Rewriting the last part of the last old page? */ ! old_lastpart = old_lastblk && (old_break == old_blkend); ! new_cur = new_vmbuf + SizeOfPageHeaderData; /* Process old page bytes one by one, and turn it into new page. */ ! while (old_cur < old_break) { + uint8 byte = *(uint8 *) old_cur; uint16 new_vmbits = 0; int i; /* Generate new format bits while keeping old information */ for (i = 0; i < BITS_PER_BYTE; i++) { ! if (byte & (1 << i)) { empty = false; ! new_vmbits |= ! VISIBILITYMAP_ALL_VISIBLE << (BITS_PER_HEAPBLOCK * i); } } /* Copy new visibility map bit to new format page */ ! new_cur[0] = (char) (new_vmbits & 0xFF); ! new_cur[1] = (char) (new_vmbits >> 8); ! old_cur++; new_cur += BITS_PER_HEAPBLOCK; } ! /* If the last part of the last page is empty, skip writing it */ if (old_lastpart && empty) break; ! /* Set new checksum for visibility map page, if enabled */ ! if (new_cluster.controldata.data_checksum_version != 0) ((PageHeader) new_vmbuf)->pd_checksum = pg_checksum_page(new_vmbuf, new_blkno); *************** rewriteVisibilityMap(const char *fromfil *** 290,306 **** return getErrorText(); } old_break += rewriteVmBytesPerPage; new_blkno++; } } ! /* Close files */ close(dst_fd); close(src_fd); return NULL; - } void --- 295,313 ---- return getErrorText(); } + /* Advance for next new page */ old_break += rewriteVmBytesPerPage; new_blkno++; } } ! /* Clean up */ ! pg_free(buffer); ! pg_free(new_vmbuf); close(dst_fd); close(src_fd); return NULL; } void
pgsql-hackers by date: