Re: Emacs vs pg_indent's weird indentation for function declarations - Mailing list pgsql-hackers

From Tom Lane
Subject Re: Emacs vs pg_indent's weird indentation for function declarations
Date
Msg-id 14259.1557895390@sss.pgh.pa.us
Whole thread Raw
In response to Re: Emacs vs pg_indent's weird indentation for function declarations  (Thomas Munro <thomas.munro@gmail.com>)
Responses Re: Emacs vs pg_indent's weird indentation for function declarations
List pgsql-hackers
Thomas Munro <thomas.munro@gmail.com> writes:
> I tried teaching pgindent's post_indent subroutine to unmangle the
> multi-line declarations it mangles.  That produces correct
> indentation!  But can also produce lines that exceed the column limit
> we would normally wrap at (of course, because pg_bsd_indent had less
> whitespace on the left when it made wrapping decisions).  Doh.
> Attached for posterity, but it's useless.

> So I think pg_bsd_indent itself needs to be fixed.  I think I know
> where the problem is.  lexi.c isn't looking far ahead enough to
> recognise multi-line function declarations:

I experimented with fixing this.  I was able to get pg_bsd_indent to
distinguish multi-line function declarations from definitions, but it
turns out that it doesn't help your concern about the lines being too
long after re-indenting.  Contrary to what you imagine above, it seems
pg_bsd_indent will not reflow argument lists, regardless of whether it
thinks there needs to be more or less leading whitespace.  I'm a bit
surprised that -bc doesn't cause that to happen, but it doesn't (and I'm
not sure we'd really want to force one-parameter-per-line, anyway).

Anyway, the attached hasty-and-undercommented change to pg_bsd_indent
allows removal of the "Move prototype names to the same line as return
type" hack in pgindent, and we then get prototypes with properly
lined-up arguments, but we'll have a lot of places with over-length
lines needing manual fixing.  Unless somebody wants to find where to
fix that in pg_bsd_indent, but I've had my fill of looking at that
spaghetti code for today.

            regards, tom lane

diff --git a/indent.h b/indent.h
index 0fffd89..1708dbc 100644
--- a/indent.h
+++ b/indent.h
@@ -41,6 +41,8 @@ void    diag2(int, const char *);
 void    diag3(int, const char *, int);
 void    diag4(int, const char *, int, int);
 void    dump_line(void);
+int    lookahead(void);
+void    lookahead_reset(void);
 void    fill_buffer(void);
 void    parse(int);
 void    pr_comment(void);
diff --git a/io.c b/io.c
index df11094..8d13a52 100644
--- a/io.c
+++ b/io.c
@@ -51,6 +51,13 @@ static char sccsid[] = "@(#)io.c    8.1 (Berkeley) 6/6/93";
 
 int         comment_open;
 static int  paren_target;
+
+static char *lookahead_buf;    /* malloc'd buffer, or NULL initially */
+static char *lookahead_buf_end; /* end+1 of allocated space */
+static char *lookahead_start;    /* => next char for fill_buffer() to fetch */
+static char *lookahead_ptr;    /* => next char for lookahead() to fetch */
+static char *lookahead_end;    /* last+1 valid char in lookahead_buf */
+
 static int pad_output(int current, int target);
 
 void
@@ -252,6 +259,58 @@ compute_label_target(void)
     : ps.ind_size * (ps.ind_level - label_offset) + 1;
 }
 
+/*
+ * Read data ahead of what has been collected into in_buffer.
+ *
+ * Successive calls get further and further ahead, until we hit EOF.
+ * Call lookahead_reset to rescan from just beyond in_buffer.
+ */
+int
+lookahead(void)
+{
+    while (lookahead_ptr >= lookahead_end) {
+      int i = getc(input);
+
+      if (i == EOF)
+    return i;
+      if (i == '\0')
+    continue;        /* fill_buffer drops nulls, so do we */
+
+      if (lookahead_end >= lookahead_buf_end) {
+    /* Need to allocate or enlarge lookahead_buf */
+    char *new_buf;
+    size_t req;
+
+    if (lookahead_buf == NULL) {
+      req = 64;
+      new_buf = malloc(req);
+    } else {
+      req = (lookahead_buf_end - lookahead_buf) * 2;
+      new_buf = realloc(lookahead_buf, req);
+    }
+    if (new_buf == NULL)
+      errx(1, "too much lookahead required");
+    lookahead_start = new_buf + (lookahead_start - lookahead_buf);
+    lookahead_ptr = new_buf + (lookahead_ptr - lookahead_buf);
+    lookahead_end = new_buf + (lookahead_end - lookahead_buf);
+    lookahead_buf = new_buf;
+    lookahead_buf_end = new_buf + req;
+      }
+
+      *lookahead_end++ = i;
+    }
+    return (unsigned char) *lookahead_ptr++;
+}
+
+/*
+ * Reset so that lookahead() will again scan from just beyond what's in
+ * in_buffer.
+ */
+void
+lookahead_reset(void)
+{
+    lookahead_ptr = lookahead_start;
+}
 
 /*
  * Copyright (C) 1976 by the Board of Trustees of the University of Illinois
@@ -293,11 +352,16 @@ fill_buffer(void)
         p = in_buffer + offset;
         in_buffer_limit = in_buffer + size - 2;
     }
-    if ((i = getc(f)) == EOF) {
-        *p++ = ' ';
-        *p++ = '\n';
-        had_eof = true;
-        break;
+    if (lookahead_start < lookahead_end) {
+      i = (unsigned char) *lookahead_start++;
+    } else {
+      lookahead_start = lookahead_ptr = lookahead_end = lookahead_buf;
+      if ((i = getc(f)) == EOF) {
+        *p++ = ' ';
+        *p++ = '\n';
+        had_eof = true;
+        break;
+      }
     }
     if (i != '\0')
         *p++ = i;
diff --git a/lexi.c b/lexi.c
index 3c7bfef..e637e1a 100644
--- a/lexi.c
+++ b/lexi.c
@@ -148,6 +148,39 @@ strcmp_type(const void *e1, const void *e2)
     return (strcmp(e1, *(const char * const *)e2));
 }
 
+/*
+ * Scan over a function argument declaration list, then see if it is
+ * followed by ';' or ',' indicating that it's just a prototype.
+ *
+ * We do not detect comments, so you can fool this by putting unbalanced
+ * parens inside a comment within the argument list.  So don't do that.
+ */
+static int
+is_prototype(char *tp)
+{
+  int paren_depth = 0;
+
+  lookahead_reset();
+  for (;;) {
+    int c;
+
+    if (tp < buf_end)
+      c = *tp++;
+    else {
+    c = lookahead();
+    if (c == EOF)
+      break;
+    }
+    if (c == '(')
+      paren_depth++;
+    else if (c == ')')
+      paren_depth--;
+    else if (paren_depth == 0 && !isspace((unsigned char) c))
+      return (c == ';' || c == ',');
+  }
+  return false;
+}
+
 int
 lexi(struct parser_state *state)
 {
@@ -348,15 +381,12 @@ lexi(struct parser_state *state)
     }            /* end of if (found_it) */
     if (*buf_ptr == '(' && state->tos <= 1 && state->ind_level == 0 &&
         state->in_parameter_declaration == 0 && state->block_init == 0) {
-        char *tp = buf_ptr;
-        while (tp < buf_end)
-        if (*tp++ == ')' && (*tp == ';' || *tp == ','))
-            goto not_proc;
+      if (!is_prototype(buf_ptr)) {
         strncpy(state->procname, token, sizeof state->procname - 1);
         if (state->in_decl)
         state->in_parameter_declaration = 1;
         return (funcname);
-    not_proc:;
+      }
     }
     /*
      * The following hack attempts to guess whether or not the current

pgsql-hackers by date:

Previous
From: Michael Paquier
Date:
Subject: wal_consistency_checking clean on HEAD (f4125278)
Next
From: David Fetter
Date:
Subject: Re: New EXPLAIN option: ALL