Thread: test_fsync label adjustments
I have modified test_fsync to use test labels that match wal_sync_method values, and and added more tests for open_sync with different sizes. This should make the program easier for novices to understand. Here is a test run for Ubuntu 11.04: $ ./test_fsync 2000 operations per test Compare file sync methods using one 8k write: (in wal_sync_method preference order, except fdatasync is Linux's default) open_datasync (non-direct I/O)* 85.127 ops/sec open_datasync (direct I/O) 87.119 ops/sec fdatasync 81.006 ops/sec fsync 82.621 ops/sec fsync_writethrough n/a open_sync (non-direct I/O)* 84.412 ops/sec open_sync (direct I/O) 91.006 ops/sec * This non-direct I/O mode is not used by Postgres. Compare file sync methods using two 8k writes: (in wal_sync_method preference order, except fdatasync is Linux's default) open_datasync (non-direct I/O)* 42.721 ops/sec open_datasync (direct I/O) 45.296 ops/sec fdatasync 76.665 ops/sec fsync 78.361 ops/sec fsync_writethrough n/a open_sync (non-direct I/O)* 42.311 ops/sec open_sync (direct I/O) 45.247 ops/sec * This non-direct I/O mode is not used by Postgres. Compare open_sync with different write sizes: (This is designed to compare the cost of writing 16k in different write open_sync sizes.) 1 16k open_sync write 86.740 ops/sec 2 8k open_sync writes 44.709 ops/sec 4 4k open_sync writes 22.096 ops/sec 8 2k open_sync writes 10.856 ops/sec 16 1k open_sync writes 5.434 ops/sec Test if fsync on non-write file descriptor is honored: (If the times are similar, fsync() can sync data written on a different descriptor.) write, fsync, close 86.802 ops/sec write, close, fsync 85.766 ops/sec Non-sync'ed 8k writes: write 83.068 ops/sec Applied patch attached. -- Bruce Momjian <bruce@momjian.us> http://momjian.us EnterpriseDB http://enterprisedb.com + It's impossible for everything to be true. + diff --git a/src/tools/fsync/test_fsync.c b/src/tools/fsync/test_fsync.c index b1cec74..9c829ba 100644 *** /tmp/pgdiff.3331/yCQf2a_test_fsync.c Tue Jan 18 15:06:39 2011 --- src/tools/fsync/test_fsync.c Tue Jan 18 14:43:58 2011 *************** void test_open(void); *** 47,52 **** --- 47,53 ---- void test_non_sync(void); void test_sync(int writes_per_op); void test_open_syncs(void); + void test_open_sync(const char *msg, int writes_size); void test_file_descriptor_sync(void); void print_elapse(struct timeval start_t, struct timeval stop_t); void die(char *str); *************** main(int argc, char *argv[]) *** 61,68 **** test_open(); - test_non_sync(); - /* Test using 1 8k write */ test_sync(1); --- 62,67 ---- *************** main(int argc, char *argv[]) *** 73,78 **** --- 72,79 ---- test_file_descriptor_sync(); + test_non_sync(); + unlink(filename); return 0; *************** handle_args(int argc, char *argv[]) *** 105,111 **** } while ((option = getopt_long(argc, argv, "f:o:", ! long_options, &optindex)) != -1) { switch (option) { --- 106,112 ---- } while ((option = getopt_long(argc, argv, "f:o:", ! long_options, &optindex)) != -1) { switch (option) { *************** handle_args(int argc, char *argv[]) *** 126,132 **** } } ! printf("%d operations per test\n\n", ops_per_test); } void --- 127,133 ---- } } ! printf("%d operations per test\n", ops_per_test); } void *************** test_open(void) *** 162,201 **** } void - test_non_sync(void) - { - int tmpfile, ops; - - /* - * Test a simple write without fsync - */ - printf("Simple non-sync'ed write:\n"); - printf(LABEL_FORMAT, "8k write"); - fflush(stdout); - - gettimeofday(&start_t, NULL); - for (ops = 0; ops < ops_per_test; ops++) - { - if ((tmpfile = open(filename, O_RDWR, 0)) == -1) - die("Cannot open output file."); - if (write(tmpfile, buf, WRITE_SIZE) != WRITE_SIZE) - die("write failed"); - close(tmpfile); - } - gettimeofday(&stop_t, NULL); - print_elapse(start_t, stop_t); - } - - void test_sync(int writes_per_op) { int tmpfile, ops, writes; bool fs_warning = false; if (writes_per_op == 1) ! printf("\nCompare file sync methods using one write:\n"); else ! printf("\nCompare file sync methods using two writes:\n"); printf("(in wal_sync_method preference order, except fdatasync\n"); printf("is Linux's default)\n"); --- 163,177 ---- } void test_sync(int writes_per_op) { int tmpfile, ops, writes; bool fs_warning = false; if (writes_per_op == 1) ! printf("\nCompare file sync methods using one 8k write:\n"); else ! printf("\nCompare file sync methods using two 8k writes:\n"); printf("(in wal_sync_method preference order, except fdatasync\n"); printf("is Linux's default)\n"); *************** test_sync(int writes_per_op) *** 203,218 **** * Test open_datasync if available */ #ifdef OPEN_DATASYNC_FLAG ! if (writes_per_op == 1) ! printf(LABEL_FORMAT, "open_datasync 8k write" ! #if PG_O_DIRECT != 0 ! "*" ! #endif ! ); ! else ! printf(LABEL_FORMAT, "2 open_datasync 8k writes" #if PG_O_DIRECT != 0 ! "*" #endif ); fflush(stdout); --- 179,187 ---- * Test open_datasync if available */ #ifdef OPEN_DATASYNC_FLAG ! printf(LABEL_FORMAT, "open_datasync" #if PG_O_DIRECT != 0 ! " (non-direct I/O)*" #endif ); fflush(stdout); *************** test_sync(int writes_per_op) *** 243,252 **** } else { ! if (writes_per_op == 1) ! printf(LABEL_FORMAT, "open_datasync 8k direct I/O write"); ! else ! printf(LABEL_FORMAT, "2 open_datasync 8k direct I/O writes"); fflush(stdout); gettimeofday(&start_t, NULL); --- 212,218 ---- } else { ! printf(LABEL_FORMAT, "open_datasync (direct I/O)"); fflush(stdout); gettimeofday(&start_t, NULL); *************** test_sync(int writes_per_op) *** 262,269 **** close(tmpfile); print_elapse(start_t, stop_t); } - #else - printf(NA_FORMAT, "o_direct", "n/a\n"); #endif #else --- 228,233 ---- *************** test_sync(int writes_per_op) *** 274,283 **** * Test fdatasync if available */ #ifdef HAVE_FDATASYNC ! if (writes_per_op == 1) ! printf(LABEL_FORMAT, "8k write, fdatasync"); ! else ! printf(LABEL_FORMAT, "8k write, 8k write, fdatasync"); fflush(stdout); if ((tmpfile = open(filename, O_RDWR, 0)) == -1) --- 238,244 ---- * Test fdatasync if available */ #ifdef HAVE_FDATASYNC ! printf(LABEL_FORMAT, "fdatasync"); fflush(stdout); if ((tmpfile = open(filename, O_RDWR, 0)) == -1) *************** test_sync(int writes_per_op) *** 302,311 **** /* * Test fsync */ ! if (writes_per_op == 1) ! printf(LABEL_FORMAT, "8k write, fsync"); ! else ! printf(LABEL_FORMAT, "8k write, 8k write, fsync"); fflush(stdout); if ((tmpfile = open(filename, O_RDWR, 0)) == -1) --- 263,269 ---- /* * Test fsync */ ! printf(LABEL_FORMAT, "fsync"); fflush(stdout); if ((tmpfile = open(filename, O_RDWR, 0)) == -1) *************** test_sync(int writes_per_op) *** 329,338 **** * If fsync_writethrough is available, test as well */ #ifdef HAVE_FSYNC_WRITETHROUGH ! if (writes_per_op == 1) ! printf(LABEL_FORMAT, "8k write, fsync_writethrough"); ! else ! printf(LABEL_FORMAT, "8k write, 8k write, fsync_writethrough"); fflush(stdout); if ((tmpfile = open(filename, O_RDWR, 0)) == -1) --- 287,293 ---- * If fsync_writethrough is available, test as well */ #ifdef HAVE_FSYNC_WRITETHROUGH ! printf(LABEL_FORMAT, "fsync_writethrough"); fflush(stdout); if ((tmpfile = open(filename, O_RDWR, 0)) == -1) *************** test_sync(int writes_per_op) *** 359,374 **** * Test open_sync if available */ #ifdef OPEN_SYNC_FLAG ! if (writes_per_op == 1) ! printf(LABEL_FORMAT, "open_sync 8k write" ! #if PG_O_DIRECT != 0 ! "*" ! #endif ! ); ! else ! printf(LABEL_FORMAT, "2 open_sync 8k writes" #if PG_O_DIRECT != 0 ! "*" #endif ); fflush(stdout); --- 314,322 ---- * Test open_sync if available */ #ifdef OPEN_SYNC_FLAG ! printf(LABEL_FORMAT, "open_sync" #if PG_O_DIRECT != 0 ! " (non-direct I/O)*" #endif ); fflush(stdout); *************** test_sync(int writes_per_op) *** 399,408 **** } else { ! if (writes_per_op == 1) ! printf(LABEL_FORMAT, "open_sync 8k direct I/O write"); ! else ! printf(LABEL_FORMAT, "2 open_sync 8k direct I/O writes"); fflush(stdout); gettimeofday(&start_t, NULL); --- 347,353 ---- } else { ! printf(LABEL_FORMAT, "open_sync (direct I/O)"); fflush(stdout); gettimeofday(&start_t, NULL); *************** test_sync(int writes_per_op) *** 418,425 **** close(tmpfile); print_elapse(start_t, stop_t); } - #else - printf(NA_FORMAT, "o_direct", "n/a\n"); #endif #else --- 363,368 ---- *************** test_sync(int writes_per_op) *** 428,434 **** #if defined(OPEN_DATASYNC_FLAG) || defined(OPEN_SYNC_FLAG) if (PG_O_DIRECT != 0) ! printf("* This non-direct I/O option is not used by Postgres.\n"); #endif if (fs_warning) --- 371,377 ---- #if defined(OPEN_DATASYNC_FLAG) || defined(OPEN_SYNC_FLAG) if (PG_O_DIRECT != 0) ! printf("* This non-direct I/O mode is not used by Postgres.\n"); #endif if (fs_warning) *************** test_sync(int writes_per_op) *** 441,454 **** void test_open_syncs(void) { ! int tmpfile, ops; ! /* ! * Compare 1 to 2 writes ! */ ! printf("\nCompare open_sync with different sizes:\n"); ! printf("(This is designed to compare the cost of one large\n"); ! printf("sync'ed write and two smaller sync'ed writes.)\n"); /* * Test open_sync with different size files --- 384,405 ---- void test_open_syncs(void) { ! printf("\nCompare open_sync with different write sizes:\n"); ! printf("(This is designed to compare the cost of writing 16k\n"); ! printf("in different write open_sync sizes.)\n"); ! test_open_sync(" 1 16k open_sync write", 16); ! test_open_sync(" 2 8k open_sync writes", 8); ! test_open_sync(" 4 4k open_sync writes", 4); ! test_open_sync(" 8 2k open_sync writes", 2); ! test_open_sync("16 1k open_sync writes", 1); ! } ! ! ! void ! test_open_sync(const char *msg, int writes_size) ! { ! int tmpfile, ops, writes; /* * Test open_sync with different size files *************** test_open_syncs(void) *** 458,471 **** printf(NA_FORMAT, "o_direct", "n/a**\n"); else { ! printf(LABEL_FORMAT, "open_sync 16k write"); fflush(stdout); gettimeofday(&start_t, NULL); for (ops = 0; ops < ops_per_test; ops++) { ! if (write(tmpfile, buf, WRITE_SIZE * 2) != WRITE_SIZE * 2) ! die("write failed"); if (lseek(tmpfile, 0, SEEK_SET) == -1) die("seek failed"); } --- 409,423 ---- printf(NA_FORMAT, "o_direct", "n/a**\n"); else { ! printf(LABEL_FORMAT, msg); fflush(stdout); gettimeofday(&start_t, NULL); for (ops = 0; ops < ops_per_test; ops++) { ! for (writes = 0; writes < 16 / writes_size; writes++) ! if (write(tmpfile, buf, writes_size) != writes_size) ! die("write failed"); if (lseek(tmpfile, 0, SEEK_SET) == -1) die("seek failed"); } *************** test_open_syncs(void) *** 474,500 **** print_elapse(start_t, stop_t); } - if ((tmpfile = open(filename, O_RDWR | OPEN_SYNC_FLAG | PG_O_DIRECT, 0)) == -1) - printf(NA_FORMAT, "o_direct", "n/a**\n"); - else - { - printf(LABEL_FORMAT, "2 open_sync 8k writes"); - fflush(stdout); - - gettimeofday(&start_t, NULL); - for (ops = 0; ops < ops_per_test; ops++) - { - if (write(tmpfile, buf, WRITE_SIZE) != WRITE_SIZE) - die("write failed"); - if (write(tmpfile, buf, WRITE_SIZE) != WRITE_SIZE) - die("write failed"); - if (lseek(tmpfile, 0, SEEK_SET) == -1) - die("seek failed"); - } - gettimeofday(&stop_t, NULL); - close(tmpfile); - print_elapse(start_t, stop_t); - } #else printf(NA_FORMAT, "open_sync", "n/a\n"); #endif --- 426,431 ---- *************** test_file_descriptor_sync(void) *** 520,526 **** * first write, fsync and close, which is the * normal behavior without multiple descriptors */ ! printf(LABEL_FORMAT, "8k write, fsync, close"); fflush(stdout); gettimeofday(&start_t, NULL); --- 451,457 ---- * first write, fsync and close, which is the * normal behavior without multiple descriptors */ ! printf(LABEL_FORMAT, "write, fsync, close"); fflush(stdout); gettimeofday(&start_t, NULL); *************** test_file_descriptor_sync(void) *** 549,555 **** * This simulates processes fsyncing each other's * writes. */ ! printf(LABEL_FORMAT, "8k write, close, fsync"); fflush(stdout); gettimeofday(&start_t, NULL); --- 480,486 ---- * This simulates processes fsyncing each other's * writes. */ ! printf(LABEL_FORMAT, "write, close, fsync"); fflush(stdout); gettimeofday(&start_t, NULL); *************** test_file_descriptor_sync(void) *** 572,577 **** --- 503,533 ---- } + void + test_non_sync(void) + { + int tmpfile, ops; + + /* + * Test a simple write without fsync + */ + printf("\nNon-sync'ed 8k writes:\n"); + printf(LABEL_FORMAT, "write"); + fflush(stdout); + + gettimeofday(&start_t, NULL); + for (ops = 0; ops < ops_per_test; ops++) + { + if ((tmpfile = open(filename, O_RDWR, 0)) == -1) + die("Cannot open output file."); + if (write(tmpfile, buf, WRITE_SIZE) != WRITE_SIZE) + die("write failed"); + close(tmpfile); + } + gettimeofday(&stop_t, NULL); + print_elapse(start_t, stop_t); + } + /* * print out the writes per second for tests */
On Jan 18, 2011, at 3:55 PM, Bruce Momjian wrote: > I have modified test_fsync to use test labels that match wal_sync_method > values, and and added more tests for open_sync with different sizes. > This should make the program easier for novices to understand. Here is > a test run for Ubuntu 11.04: > > $ ./test_fsync > 2000 operations per test > > Compare file sync methods using one 8k write: > (in wal_sync_method preference order, except fdatasync > is Linux's default) > open_datasync (non-direct I/O)* 85.127 ops/sec > open_datasync (direct I/O) 87.119 ops/sec > fdatasync 81.006 ops/sec > fsync 82.621 ops/sec > fsync_writethrough n/a > open_sync (non-direct I/O)* 84.412 ops/sec > open_sync (direct I/O) 91.006 ops/sec > * This non-direct I/O mode is not used by Postgres. I am curious how this is targeted at novices. A naive user might enable the "fastest" option which could be exactly wrong.For this to be useful to novices, I suspect the tool will need to generate platform-specific suggestions, no? Cheers, M
A.M. wrote: > > On Jan 18, 2011, at 3:55 PM, Bruce Momjian wrote: > > > I have modified test_fsync to use test labels that match wal_sync_method > > values, and and added more tests for open_sync with different sizes. > > This should make the program easier for novices to understand. Here is > > a test run for Ubuntu 11.04: > > > > $ ./test_fsync > > 2000 operations per test > > > > Compare file sync methods using one 8k write: > > (in wal_sync_method preference order, except fdatasync > > is Linux's default) > > open_datasync (non-direct I/O)* 85.127 ops/sec > > open_datasync (direct I/O) 87.119 ops/sec > > fdatasync 81.006 ops/sec > > fsync 82.621 ops/sec > > fsync_writethrough n/a > > open_sync (non-direct I/O)* 84.412 ops/sec > > open_sync (direct I/O) 91.006 ops/sec > > * This non-direct I/O mode is not used by Postgres. > > I am curious how this is targeted at novices. A naive user might enable > the "fastest" option which could be exactly wrong. For this to be useful > to novices, I suspect the tool will need to generate platform-specific > suggestions, no? Uh, why isn't the fastest option right for them? It is hardware/kernel specific when you run it --- how could it be better? -- Bruce Momjian <bruce@momjian.us> http://momjian.us EnterpriseDB http://enterprisedb.com + It's impossible for everything to be true. +
On Jan 18, 2011, at 5:16 PM, Bruce Momjian wrote: > A.M. wrote: >> >> On Jan 18, 2011, at 3:55 PM, Bruce Momjian wrote: >> >>> I have modified test_fsync to use test labels that match wal_sync_method >>> values, and and added more tests for open_sync with different sizes. >>> This should make the program easier for novices to understand. Here is >>> a test run for Ubuntu 11.04: >>> >>> $ ./test_fsync >>> 2000 operations per test >>> >>> Compare file sync methods using one 8k write: >>> (in wal_sync_method preference order, except fdatasync >>> is Linux's default) >>> open_datasync (non-direct I/O)* 85.127 ops/sec >>> open_datasync (direct I/O) 87.119 ops/sec >>> fdatasync 81.006 ops/sec >>> fsync 82.621 ops/sec >>> fsync_writethrough n/a >>> open_sync (non-direct I/O)* 84.412 ops/sec >>> open_sync (direct I/O) 91.006 ops/sec >>> * This non-direct I/O mode is not used by Postgres. >> >> I am curious how this is targeted at novices. A naive user might enable >> the "fastest" option which could be exactly wrong. For this to be useful >> to novices, I suspect the tool will need to generate platform-specific >> suggestions, no? > > Uh, why isn't the fastest option right for them? It is hardware/kernel > specific when you run it --- how could it be better? Because the fastest option may not be syncing to disk. For example, the only option that makes sense on OS X is fsync_writethrough-it would be helpful if the tool pointed that out (on OS X only, obviously). Cheers, M
A.M. wrote: > > On Jan 18, 2011, at 5:16 PM, Bruce Momjian wrote: > > > A.M. wrote: > >> > >> On Jan 18, 2011, at 3:55 PM, Bruce Momjian wrote: > >> > >>> I have modified test_fsync to use test labels that match wal_sync_method > >>> values, and and added more tests for open_sync with different sizes. > >>> This should make the program easier for novices to understand. Here is > >>> a test run for Ubuntu 11.04: > >>> > >>> $ ./test_fsync > >>> 2000 operations per test > >>> > >>> Compare file sync methods using one 8k write: > >>> (in wal_sync_method preference order, except fdatasync > >>> is Linux's default) > >>> open_datasync (non-direct I/O)* 85.127 ops/sec > >>> open_datasync (direct I/O) 87.119 ops/sec > >>> fdatasync 81.006 ops/sec > >>> fsync 82.621 ops/sec > >>> fsync_writethrough n/a > >>> open_sync (non-direct I/O)* 84.412 ops/sec > >>> open_sync (direct I/O) 91.006 ops/sec > >>> * This non-direct I/O mode is not used by Postgres. > >> > >> I am curious how this is targeted at novices. A naive user might enable > >> the "fastest" option which could be exactly wrong. For this to be useful > >> to novices, I suspect the tool will need to generate platform-specific > >> suggestions, no? > > > > Uh, why isn't the fastest option right for them? It is hardware/kernel > > specific when you run it --- how could it be better? > > Because the fastest option may not be syncing to disk. For example, > the only option that makes sense on OS X is fsync_writethrough- it > would be helpful if the tool pointed that out (on OS X only, obviously). Yes, that would be a serious problem. :-( I am not sure how we would address this --- your point is a good one. -- Bruce Momjian <bruce@momjian.us> http://momjian.us EnterpriseDB http://enterprisedb.com + It's impossible for everything to be true. +
On Jan 18, 2011, at 5:21 PM, Bruce Momjian wrote: > A.M. wrote: >> >> On Jan 18, 2011, at 5:16 PM, Bruce Momjian wrote: >> >>> A.M. wrote: >>>> >>>> On Jan 18, 2011, at 3:55 PM, Bruce Momjian wrote: >>>> >>>>> I have modified test_fsync to use test labels that match wal_sync_method >>>>> values, and and added more tests for open_sync with different sizes. >>>>> This should make the program easier for novices to understand. Here is >>>>> a test run for Ubuntu 11.04: >>>>> >>>>> $ ./test_fsync >>>>> 2000 operations per test >>>>> >>>>> Compare file sync methods using one 8k write: >>>>> (in wal_sync_method preference order, except fdatasync >>>>> is Linux's default) >>>>> open_datasync (non-direct I/O)* 85.127 ops/sec >>>>> open_datasync (direct I/O) 87.119 ops/sec >>>>> fdatasync 81.006 ops/sec >>>>> fsync 82.621 ops/sec >>>>> fsync_writethrough n/a >>>>> open_sync (non-direct I/O)* 84.412 ops/sec >>>>> open_sync (direct I/O) 91.006 ops/sec >>>>> * This non-direct I/O mode is not used by Postgres. >>>> >>>> I am curious how this is targeted at novices. A naive user might enable >>>> the "fastest" option which could be exactly wrong. For this to be useful >>>> to novices, I suspect the tool will need to generate platform-specific >>>> suggestions, no? >>> >>> Uh, why isn't the fastest option right for them? It is hardware/kernel >>> specific when you run it --- how could it be better? >> >> Because the fastest option may not be syncing to disk. For example, >> the only option that makes sense on OS X is fsync_writethrough- it >> would be helpful if the tool pointed that out (on OS X only, obviously). > > Yes, that would be a serious problem. :-( > > I am not sure how we would address this --- your point is a good one. One general idea I had would be to offer some heuristics such as "this sync rate is comparable to that of one SATA drive"or "comparable to RAID 10 with X drives" or "this rate is likely too fast to be actually be syncing". But then youare stuck with making sure that the heuristics are kept up-to-date, which would be annoying. Otherwise, the only option I see is to detect the kernel and compare against a list of known problematic methods. Perhapsit would be easier to compare against a whitelist. Also, the tool would likely need to parse "mount" output to accountfor problems with specific filesystems. I am just throwing around some ideas... Cheers, M
A.M. wrote: > >> Because the fastest option may not be syncing to disk. For example, > >> the only option that makes sense on OS X is fsync_writethrough- it > >> would be helpful if the tool pointed that out (on OS X only, obviously). > > > > Yes, that would be a serious problem. :-( > > > > I am not sure how we would address this --- your point is a good one. > > One general idea I had would be to offer some heuristics such as "this > sync rate is comparable to that of one SATA drive" or "comparable to > RAID 10 with X drives" or "this rate is likely too fast to be actually > be syncing". But then you are stuck with making sure that the heuristics > are kept up-to-date, which would be annoying. That fails for RAID BBUs. > Otherwise, the only option I see is to detect the kernel and compare > against a list of known problematic methods. Perhaps it would be easier > to compare against a whitelist. Also, the tool would likely need to > parse "mount" output to account for problems with specific filesystems. > > I am just throwing around some ideas... That sounds pretty complicated. One idea would be the creation of a wiki where people could post their results, or ideally a tool that could read the output and load it into a database for analysis with other results. -- Bruce Momjian <bruce@momjian.us> http://momjian.us EnterpriseDB http://enterprisedb.com + It's impossible for everything to be true. +
Bruce Momjian <bruce@momjian.us> writes: > I have modified test_fsync to use test labels that match wal_sync_method > values, and and added more tests for open_sync with different sizes. Given that it was unclear whether the first such test was of any value, why are you slowing down the program by adding more? regards, tom lane
Tom Lane wrote: > Bruce Momjian <bruce@momjian.us> writes: > > I have modified test_fsync to use test labels that match wal_sync_method > > values, and and added more tests for open_sync with different sizes. > > Given that it was unclear whether the first such test was of any value, > why are you slowing down the program by adding more? Greg Smith indicated it has value, so I made it more complete. No? -- Bruce Momjian <bruce@momjian.us> http://momjian.us EnterpriseDB http://enterprisedb.com + It's impossible for everything to be true. +
Bruce Momjian <bruce@momjian.us> writes: > Tom Lane wrote: >> Bruce Momjian <bruce@momjian.us> writes: >>> I have modified test_fsync to use test labels that match wal_sync_method >>> values, and and added more tests for open_sync with different sizes. >> Given that it was unclear whether the first such test was of any value, >> why are you slowing down the program by adding more? > Greg Smith indicated it has value, so I made it more complete. No? My recollection of that discussion is a bit different: there wasn't a clear-cut reason to rip it out. But the more tests you add to test_fsync, the less useful it becomes. regards, tom lane
Tom Lane wrote: > Bruce Momjian <bruce@momjian.us> writes: > > Tom Lane wrote: > >> Bruce Momjian <bruce@momjian.us> writes: > >>> I have modified test_fsync to use test labels that match wal_sync_method > >>> values, and and added more tests for open_sync with different sizes. > > >> Given that it was unclear whether the first such test was of any value, > >> why are you slowing down the program by adding more? > > > Greg Smith indicated it has value, so I made it more complete. No? > > My recollection of that discussion is a bit different: there wasn't a > clear-cut reason to rip it out. But the more tests you add to > test_fsync, the less useful it becomes. Well, this is Greg Smith's text: http://archives.postgresql.org/pgsql-hackers/2011-01/msg01717.php> Might be some value for determining things like what theoptimal WAL > block size to use is. All these tests are kind of hard to use > effectively still, I'm not sure if it'stime to start trimming tests yet > until we've made more progress on interpreting results first. so I figured the test should be complete; a partial test is pretty useless. What I am thinking is that the program should just run the first test by default (to choose wal_sync_method), and add a -v option to run the additional tests. Yes? -- Bruce Momjian <bruce@momjian.us> http://momjian.us EnterpriseDB http://enterprisedb.com + It's impossible for everything to be true. +
On Jan 18, 2011, at 5:41 PM, Bruce Momjian wrote: > A.M. wrote: >>>> Because the fastest option may not be syncing to disk. For example, >>>> the only option that makes sense on OS X is fsync_writethrough- it >>>> would be helpful if the tool pointed that out (on OS X only, obviously). >>> >>> Yes, that would be a serious problem. :-( >>> >>> I am not sure how we would address this --- your point is a good one. >> >> One general idea I had would be to offer some heuristics such as "this >> sync rate is comparable to that of one SATA drive" or "comparable to >> RAID 10 with X drives" or "this rate is likely too fast to be actually >> be syncing". But then you are stuck with making sure that the heuristics >> are kept up-to-date, which would be annoying. > > That fails for RAID BBUs. Well, it's nothing more than a heuristic- it is still nice to know whether or not the fancy hardware RAID I just setup issimilar to Josh Berkus' RAID setup or a single SATA drive (which would hint at a misconfiguration). As you said, perhapsa wiki is better for this. But a wiki won't integrate with this tool, which I why I would hesitate to point novicesto this tool... should the tool point to the wiki? > >> Otherwise, the only option I see is to detect the kernel and compare >> against a list of known problematic methods. Perhaps it would be easier >> to compare against a whitelist. Also, the tool would likely need to >> parse "mount" output to account for problems with specific filesystems. >> >> I am just throwing around some ideas... > > That sounds pretty complicated. One idea would be the creation of a > wiki where people could post their results, or ideally a tool that could > read the output and load it into a database for analysis with other > results. The OS X example is pretty cut-and-dry- it would be nice if there were some kind of hints in the tool pointing in the rightdirection, or at least a few words of warning: "the fastest option may not be the safest- read the docs". Cheers, M
A.M. wrote: > > On Jan 18, 2011, at 5:41 PM, Bruce Momjian wrote: > > > A.M. wrote: > >>>> Because the fastest option may not be syncing to disk. For example, > >>>> the only option that makes sense on OS X is fsync_writethrough- it > >>>> would be helpful if the tool pointed that out (on OS X only, obviously). > >>> > >>> Yes, that would be a serious problem. :-( > >>> > >>> I am not sure how we would address this --- your point is a good one. > >> > >> One general idea I had would be to offer some heuristics such as "this > >> sync rate is comparable to that of one SATA drive" or "comparable to > >> RAID 10 with X drives" or "this rate is likely too fast to be actually > >> be syncing". But then you are stuck with making sure that the heuristics > >> are kept up-to-date, which would be annoying. > > > > That fails for RAID BBUs. > > Well, it's nothing more than a heuristic- it is still nice to know whether or not the fancy hardware RAID I just setupis similar to Josh Berkus' RAID setup or a single SATA drive (which would hint at a misconfiguration). As you said,perhaps a wiki is better for this. But a wiki won't integrate with this tool, which I why I would hesitate to pointnovices to this tool... should the tool point to the wiki? > > > > >> Otherwise, the only option I see is to detect the kernel and compare > >> against a list of known problematic methods. Perhaps it would be easier > >> to compare against a whitelist. Also, the tool would likely need to > >> parse "mount" output to account for problems with specific filesystems. > >> > >> I am just throwing around some ideas... > > > > That sounds pretty complicated. One idea would be the creation of a > > wiki where people could post their results, or ideally a tool that could > > read the output and load it into a database for analysis with other > > results. > > The OS X example is pretty cut-and-dry- it would be nice if there were > some kind of hints in the tool pointing in the right direction, or at > least a few words of warning: "the fastest option may not be the safest- > read the docs". We have a wal reliability section in the docs that attempts to address this: http://developer.postgresql.org/pgdocs/postgres/wal-reliability.html -- Bruce Momjian <bruce@momjian.us> http://momjian.us EnterpriseDB http://enterprisedb.com + It's impossible for everything to be true. +