Re: Emacs vs pg_indent's weird indentation for function declarations - Mailing list pgsql-hackers

From Tom Lane
Subject Re: Emacs vs pg_indent's weird indentation for function declarations
Date
Msg-id 7214.1558295282@sss.pgh.pa.us
Whole thread Raw
In response to Re: Emacs vs pg_indent's weird indentation for function declarations  (Tom Lane <tgl@sss.pgh.pa.us>)
List pgsql-hackers
I wrote:
> Piotr Stefaniak <postgres@piotr-stefaniak.me> writes:
>> Sorry, but GNU indent already uses -kr for something else and I would 
>> like FreeBSD indent have something like that under the same name. 
>> Besides, indent has too many options and this one doesn't look like 
>> particularly desired by anyone. It's possible that someone will complain 
>> some day, but I don't think we should assume that they'll do or that 
>> they're more important than the other users who benefit from your change 
>> being the default behavior and no additional options.

> Huh.  OK, I'll rip the switch back out again.

Here's a proposed patch for you.

            regards, tom lane

diff --git a/args.c b/args.c
index f79de75..1850542 100644
--- a/args.c
+++ b/args.c
@@ -55,7 +55,7 @@ static char sccsid[] = "@(#)args.c    8.1 (Berkeley) 6/6/93";
 #include "indent_globs.h"
 #include "indent.h"

-#define INDENT_VERSION    "2.0"
+#define INDENT_VERSION    "2.1"

 /* profile types */
 #define    PRO_SPECIAL    1    /* special case */
diff --git a/indent.h b/indent.h
index 0fffd89..1708dbc 100644
--- a/indent.h
+++ b/indent.h
@@ -41,6 +41,8 @@ void    diag2(int, const char *);
 void    diag3(int, const char *, int);
 void    diag4(int, const char *, int, int);
 void    dump_line(void);
+int    lookahead(void);
+void    lookahead_reset(void);
 void    fill_buffer(void);
 void    parse(int);
 void    pr_comment(void);
diff --git a/io.c b/io.c
index df11094..fbaa5dd 100644
--- a/io.c
+++ b/io.c
@@ -51,6 +51,14 @@ static char sccsid[] = "@(#)io.c    8.1 (Berkeley) 6/6/93";

 int         comment_open;
 static int  paren_target;
+
+static char *lookahead_buf;    /* malloc'd buffer, or NULL initially */
+static char *lookahead_buf_end;    /* end+1 of allocated space */
+static char *lookahead_start;    /* => next char for fill_buffer() to fetch */
+static char *lookahead_ptr;    /* => next char for lookahead() to fetch */
+static char *lookahead_end;    /* last+1 valid char in lookahead_buf */
+static char *lookahead_bp_save;    /* lookahead position in bp_save, if any */
+
 static int pad_output(int current, int target);

 void
@@ -252,6 +260,73 @@ compute_label_target(void)
     : ps.ind_size * (ps.ind_level - label_offset) + 1;
 }

+/*
+ * Read data ahead of what has been collected into in_buffer.
+ *
+ * Successive calls get further and further ahead, until we hit EOF.
+ * Call lookahead_reset() to rescan from just beyond in_buffer.
+ *
+ * Lookahead is automatically reset whenever fill_buffer() reads beyond
+ * the lookahead buffer, i.e., you can't use this for "look behind".
+ *
+ * The standard pattern for potentially multi-line lookahead is to call
+ * lookahead_reset(), then enter a loop that scans forward from buf_ptr
+ * to buf_end, then (if necessary) calls lookahead() to read additional
+ * characters from beyond the end of the current line.
+ */
+int
+lookahead(void)
+{
+    /* First read whatever's in bp_save area */
+    if (lookahead_bp_save != NULL && lookahead_bp_save < be_save)
+    return (unsigned char) *lookahead_bp_save++;
+    /* Else, we have to examine and probably fill the main lookahead buffer */
+    while (lookahead_ptr >= lookahead_end) {
+    int        i = getc(input);
+
+    if (i == EOF)
+        return i;
+    if (i == '\0')
+        continue;        /* fill_buffer drops nulls, and so do we */
+
+    if (lookahead_end >= lookahead_buf_end) {
+        /* Need to allocate or enlarge lookahead_buf */
+        char       *new_buf;
+        size_t    req;
+
+        if (lookahead_buf == NULL) {
+        req = 64;
+        new_buf = malloc(req);
+        } else {
+        req = (lookahead_buf_end - lookahead_buf) * 2;
+        new_buf = realloc(lookahead_buf, req);
+        }
+        if (new_buf == NULL)
+        errx(1, "too much lookahead required");
+        lookahead_start = new_buf + (lookahead_start - lookahead_buf);
+        lookahead_ptr = new_buf + (lookahead_ptr - lookahead_buf);
+        lookahead_end = new_buf + (lookahead_end - lookahead_buf);
+        lookahead_buf = new_buf;
+        lookahead_buf_end = new_buf + req;
+    }
+
+    *lookahead_end++ = i;
+    }
+    return (unsigned char) *lookahead_ptr++;
+}
+
+/*
+ * Reset so that lookahead() will again scan from just beyond what's in
+ * in_buffer.
+ */
+void
+lookahead_reset(void)
+{
+    /* Reset the main lookahead buffer */
+    lookahead_ptr = lookahead_start;
+    /* If bp_save isn't NULL, we need to scan that first */
+    lookahead_bp_save = bp_save;
+}

 /*
  * Copyright (C) 1976 by the Board of Trustees of the University of Illinois
@@ -261,7 +336,9 @@ compute_label_target(void)
  *
  * NAME: fill_buffer
  *
- * FUNCTION: Reads one block of input into input_buffer
+ * FUNCTION: Reads one line of input into in_buffer,
+ * sets up buf_ptr and buf_end to point to the line's start and end+1.
+ * (Note that the buffer does not get null-terminated.)
  *
  * HISTORY: initial coding     November 1976    D A Willcox of CAC 1/7/77 A
  * Willcox of CAC    Added check for switch back to partly full input
@@ -279,6 +356,7 @@ fill_buffer(void)
     buf_ptr = bp_save;    /* do not read anything, just switch buffers */
     buf_end = be_save;
     bp_save = be_save = NULL;
+    lookahead_bp_save = NULL;
     if (buf_ptr < buf_end)
         return;        /* only return if there is really something in
                  * this buffer */
@@ -293,16 +371,21 @@ fill_buffer(void)
         p = in_buffer + offset;
         in_buffer_limit = in_buffer + size - 2;
     }
-    if ((i = getc(f)) == EOF) {
+    if (lookahead_start < lookahead_end) {
+        i = (unsigned char) *lookahead_start++;
+    } else {
+        lookahead_start = lookahead_ptr = lookahead_end = lookahead_buf;
+        if ((i = getc(f)) == EOF) {
         *p++ = ' ';
         *p++ = '\n';
         had_eof = true;
         break;
+        }
     }
     if (i != '\0')
         *p++ = i;
     if (i == '\n')
-        break;
+        break;
     }
     buf_ptr = in_buffer;
     buf_end = p;
diff --git a/lexi.c b/lexi.c
index 3c7bfef..d43723c 100644
--- a/lexi.c
+++ b/lexi.c
@@ -148,6 +148,74 @@ strcmp_type(const void *e1, const void *e2)
     return (strcmp(e1, *(const char * const *)e2));
 }

+/*
+ * Decide whether "foo(..." is a function definition or declaration.
+ *
+ * At call, we are looking at the '('.  Look ahead to find the first
+ * '{', ';' or ',' that is not within parentheses or comments; then
+ * it's a definition if we found '{', otherwise a declaration.
+ * Note that this rule is fooled by K&R-style parameter declarations,
+ * but telling the difference between those and function attributes
+ * seems like more trouble than it's worth.  This code could also be
+ * fooled by mismatched parens or apparent comment starts within string
+ * literals, but that seems unlikely in the context it's used in.
+ */
+static int
+is_func_definition(char *tp)
+{
+    int        paren_depth = 0;
+    int        in_comment = false;
+    int        in_slash_comment = false;
+    int        lastc = 0;
+
+    /* We may need to look past the end of the current buffer. */
+    lookahead_reset();
+    for (;;) {
+    int        c;
+
+    /* Fetch next character. */
+    if (tp < buf_end)
+        c = *tp++;
+    else {
+        c = lookahead();
+        if (c == EOF)
+        break;
+    }
+    /* Handle comments. */
+    if (in_comment) {
+        if (lastc == '*' && c == '/')
+        in_comment = false;
+    } else if (lastc == '/' && c == '*' && !in_slash_comment)
+        in_comment = true;
+    else if (in_slash_comment) {
+        if (c == '\n')
+        in_slash_comment = false;
+    } else if (lastc == '/' && c == '/')
+        in_slash_comment = true;
+    /* Count nested parens properly. */
+    else if (c == '(')
+        paren_depth++;
+    else if (c == ')') {
+        paren_depth--;
+        /*
+         * If we find unbalanced parens, we must have started inside a
+         * declaration.
+         */
+        if (paren_depth < 0)
+        return false;
+    } else if (paren_depth == 0) {
+        /* We are outside any parentheses or comments. */
+        if (c == '{')
+        return true;
+        else if (c == ';' || c == ',')
+        return false;
+    }
+    lastc = c;
+    }
+    /* Hit EOF --- for lack of anything better, assume "not a definition". */
+    return false;
+}
+
 int
 lexi(struct parser_state *state)
 {
@@ -348,15 +416,12 @@ lexi(struct parser_state *state)
     }            /* end of if (found_it) */
     if (*buf_ptr == '(' && state->tos <= 1 && state->ind_level == 0 &&
         state->in_parameter_declaration == 0 && state->block_init == 0) {
-        char *tp = buf_ptr;
-        while (tp < buf_end)
-        if (*tp++ == ')' && (*tp == ';' || *tp == ','))
-            goto not_proc;
-        strncpy(state->procname, token, sizeof state->procname - 1);
-        if (state->in_decl)
-        state->in_parameter_declaration = 1;
-        return (funcname);
-    not_proc:;
+        if (is_func_definition(buf_ptr)) {
+        strncpy(state->procname, token, sizeof state->procname - 1);
+        if (state->in_decl)
+            state->in_parameter_declaration = 1;
+        return (funcname);
+        }
     }
     /*
      * The following hack attempts to guess whether or not the current
diff --git a/tests/declarations.0 b/tests/declarations.0
index 6d668b1..8419494 100644
--- a/tests/declarations.0
+++ b/tests/declarations.0
@@ -70,10 +70,10 @@ static int ald_shutingdown = 0;
 struct thread *ald_thread;

 static int
-do_execve(td, args, mac_p)
-    struct thread *td;
-    struct image_args *args;
-    struct mac *mac_p;
+do_execve(
+struct thread *td,
+struct image_args *args,
+struct mac *mac_p)
 {

 }
diff --git a/tests/declarations.0.stdout b/tests/declarations.0.stdout
index e97e447..ab5a447 100644
--- a/tests/declarations.0.stdout
+++ b/tests/declarations.0.stdout
@@ -64,10 +64,10 @@ static int    ald_shutingdown = 0;
 struct thread  *ald_thread;

 static int
-do_execve(td, args, mac_p)
-    struct thread  *td;
-    struct image_args *args;
-    struct mac     *mac_p;
+do_execve(
+      struct thread *td,
+      struct image_args *args,
+      struct mac *mac_p)
 {

 }
diff --git a/tests/list_head.0 b/tests/list_head.0
index 3a186ca..35874eb 100644
--- a/tests/list_head.0
+++ b/tests/list_head.0
@@ -1,10 +1,9 @@
 /* $FreeBSD$ */
 /* See r309380 */
 static int
-do_execve(td, args, mac_p)
-    struct thread *td;
-    struct image_args *args;
-    struct mac *mac_p;
+do_execve(struct thread *td,
+struct image_args *args,
+struct mac *mac_p)
 {

 }
diff --git a/tests/list_head.0.stdout b/tests/list_head.0.stdout
index b6f0762..2ebcca5 100644
--- a/tests/list_head.0.stdout
+++ b/tests/list_head.0.stdout
@@ -1,10 +1,9 @@
 /* $FreeBSD$ */
 /* See r309380 */
 static int
-do_execve(td, args, mac_p)
-    struct thread  *td;
-    struct image_args *args;
-    struct mac     *mac_p;
+do_execve(struct thread *td,
+      struct image_args *args,
+      struct mac *mac_p)
 {

 }

pgsql-hackers by date:

Previous
From: Andres Freund
Date:
Subject: Re: Statistical aggregate functions are not working with PARTIALaggregation
Next
From: Julien Riou
Date:
Subject: Re: PROXY protocol support