I wrote:
> Robert Haas <robertmhaas@gmail.com> writes:
>> Yeah, maybe you're right. But I'd still prefer to see us break the
>> ABI and do this just in 9.0 rather than changing 8.4.
> OK, I can live with that. I'll take a look at it shortly.
Proposed patch attached (compiles, untested as yet).
regards, tom lane
Index: src/backend/commands/analyze.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/commands/analyze.c,v
retrieving revision 1.152
diff -c -r1.152 analyze.c
*** src/backend/commands/analyze.c 26 Feb 2010 02:00:37 -0000 1.152
--- src/backend/commands/analyze.c 1 Aug 2010 19:56:12 -0000
***************
*** 92,98 ****
AnlIndexData *indexdata, int nindexes,
HeapTuple *rows, int numrows,
MemoryContext col_context);
! static VacAttrStats *examine_attribute(Relation onerel, int attnum);
static int acquire_sample_rows(Relation onerel, HeapTuple *rows,
int targrows, double *totalrows, double *totaldeadrows);
static double random_fract(void);
--- 92,99 ----
AnlIndexData *indexdata, int nindexes,
HeapTuple *rows, int numrows,
MemoryContext col_context);
! static VacAttrStats *examine_attribute(Relation onerel, int attnum,
! Node *index_expr);
static int acquire_sample_rows(Relation onerel, HeapTuple *rows,
int targrows, double *totalrows, double *totaldeadrows);
static double random_fract(void);
***************
*** 339,345 ****
(errcode(ERRCODE_UNDEFINED_COLUMN),
errmsg("column \"%s\" of relation \"%s\" does not exist",
col, RelationGetRelationName(onerel))));
! vacattrstats[tcnt] = examine_attribute(onerel, i);
if (vacattrstats[tcnt] != NULL)
tcnt++;
}
--- 340,346 ----
(errcode(ERRCODE_UNDEFINED_COLUMN),
errmsg("column \"%s\" of relation \"%s\" does not exist",
col, RelationGetRelationName(onerel))));
! vacattrstats[tcnt] = examine_attribute(onerel, i, NULL);
if (vacattrstats[tcnt] != NULL)
tcnt++;
}
***************
*** 353,359 ****
tcnt = 0;
for (i = 1; i <= attr_cnt; i++)
{
! vacattrstats[tcnt] = examine_attribute(onerel, i);
if (vacattrstats[tcnt] != NULL)
tcnt++;
}
--- 354,360 ----
tcnt = 0;
for (i = 1; i <= attr_cnt; i++)
{
! vacattrstats[tcnt] = examine_attribute(onerel, i, NULL);
if (vacattrstats[tcnt] != NULL)
tcnt++;
}
***************
*** 407,427 ****
elog(ERROR, "too few entries in indexprs list");
indexkey = (Node *) lfirst(indexpr_item);
indexpr_item = lnext(indexpr_item);
-
- /*
- * Can't analyze if the opclass uses a storage type
- * different from the expression result type. We'd get
- * confused because the type shown in pg_attribute for
- * the index column doesn't match what we are getting
- * from the expression. Perhaps this can be fixed
- * someday, but for now, punt.
- */
- if (exprType(indexkey) !=
- Irel[ind]->rd_att->attrs[i]->atttypid)
- continue;
-
thisdata->vacattrstats[tcnt] =
! examine_attribute(Irel[ind], i + 1);
if (thisdata->vacattrstats[tcnt] != NULL)
{
tcnt++;
--- 408,415 ----
elog(ERROR, "too few entries in indexprs list");
indexkey = (Node *) lfirst(indexpr_item);
indexpr_item = lnext(indexpr_item);
thisdata->vacattrstats[tcnt] =
! examine_attribute(Irel[ind], i + 1, indexkey);
if (thisdata->vacattrstats[tcnt] != NULL)
{
tcnt++;
***************
*** 802,810 ****
*
* Determine whether the column is analyzable; if so, create and initialize
* a VacAttrStats struct for it. If not, return NULL.
*/
static VacAttrStats *
! examine_attribute(Relation onerel, int attnum)
{
Form_pg_attribute attr = onerel->rd_att->attrs[attnum - 1];
HeapTuple typtuple;
--- 790,801 ----
*
* Determine whether the column is analyzable; if so, create and initialize
* a VacAttrStats struct for it. If not, return NULL.
+ *
+ * If index_expr isn't NULL, then we're trying to analyze an expression index,
+ * and index_expr is the expression tree representing the column's data.
*/
static VacAttrStats *
! examine_attribute(Relation onerel, int attnum, Node *index_expr)
{
Form_pg_attribute attr = onerel->rd_att->attrs[attnum - 1];
HeapTuple typtuple;
***************
*** 827,835 ****
stats = (VacAttrStats *) palloc0(sizeof(VacAttrStats));
stats->attr = (Form_pg_attribute) palloc(ATTRIBUTE_FIXED_PART_SIZE);
memcpy(stats->attr, attr, ATTRIBUTE_FIXED_PART_SIZE);
! typtuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(attr->atttypid));
if (!HeapTupleIsValid(typtuple))
! elog(ERROR, "cache lookup failed for type %u", attr->atttypid);
stats->attrtype = (Form_pg_type) palloc(sizeof(FormData_pg_type));
memcpy(stats->attrtype, GETSTRUCT(typtuple), sizeof(FormData_pg_type));
ReleaseSysCache(typtuple);
--- 818,847 ----
stats = (VacAttrStats *) palloc0(sizeof(VacAttrStats));
stats->attr = (Form_pg_attribute) palloc(ATTRIBUTE_FIXED_PART_SIZE);
memcpy(stats->attr, attr, ATTRIBUTE_FIXED_PART_SIZE);
!
! /*
! * When analyzing an expression index, believe the expression tree's type
! * not the column datatype --- the latter might be the opckeytype storage
! * type of the opclass, which is not interesting for our purposes. (Note:
! * if we did anything with non-expression index columns, we'd need to
! * figure out where to get the correct type info from, but for now that's
! * not a problem.) It's not clear whether anyone will care about the
! * typmod, but we store that too just in case.
! */
! if (index_expr)
! {
! stats->attrtypid = exprType(index_expr);
! stats->attrtypmod = exprTypmod(index_expr);
! }
! else
! {
! stats->attrtypid = attr->atttypid;
! stats->attrtypmod = attr->atttypmod;
! }
!
! typtuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(stats->attrtypid));
if (!HeapTupleIsValid(typtuple))
! elog(ERROR, "cache lookup failed for type %u", stats->attrtypid);
stats->attrtype = (Form_pg_type) palloc(sizeof(FormData_pg_type));
memcpy(stats->attrtype, GETSTRUCT(typtuple), sizeof(FormData_pg_type));
ReleaseSysCache(typtuple);
***************
*** 838,849 ****
/*
* The fields describing the stats->stavalues[n] element types default to
! * the type of the field being analyzed, but the type-specific typanalyze
* function can change them if it wants to store something else.
*/
for (i = 0; i < STATISTIC_NUM_SLOTS; i++)
{
! stats->statypid[i] = stats->attr->atttypid;
stats->statyplen[i] = stats->attrtype->typlen;
stats->statypbyval[i] = stats->attrtype->typbyval;
stats->statypalign[i] = stats->attrtype->typalign;
--- 850,861 ----
/*
* The fields describing the stats->stavalues[n] element types default to
! * the type of the data being analyzed, but the type-specific typanalyze
* function can change them if it wants to store something else.
*/
for (i = 0; i < STATISTIC_NUM_SLOTS; i++)
{
! stats->statypid[i] = stats->attrtypid;
stats->statyplen[i] = stats->attrtype->typlen;
stats->statypbyval[i] = stats->attrtype->typbyval;
stats->statypalign[i] = stats->attrtype->typalign;
***************
*** 1780,1786 ****
attr->attstattarget = default_statistics_target;
/* Look for default "<" and "=" operators for column's type */
! get_sort_group_operators(attr->atttypid,
false, false, false,
<opr, &eqopr, NULL);
--- 1792,1798 ----
attr->attstattarget = default_statistics_target;
/* Look for default "<" and "=" operators for column's type */
! get_sort_group_operators(stats->attrtypid,
false, false, false,
<opr, &eqopr, NULL);
***************
*** 1860,1869 ****
int nonnull_cnt = 0;
int toowide_cnt = 0;
double total_width = 0;
! bool is_varlena = (!stats->attr->attbyval &&
! stats->attr->attlen == -1);
! bool is_varwidth = (!stats->attr->attbyval &&
! stats->attr->attlen < 0);
FmgrInfo f_cmpeq;
typedef struct
{
--- 1872,1881 ----
int nonnull_cnt = 0;
int toowide_cnt = 0;
double total_width = 0;
! bool is_varlena = (!stats->attrtype->typbyval &&
! stats->attrtype->typlen == -1);
! bool is_varwidth = (!stats->attrtype->typbyval &&
! stats->attrtype->typlen < 0);
FmgrInfo f_cmpeq;
typedef struct
{
***************
*** 2126,2133 ****
for (i = 0; i < num_mcv; i++)
{
mcv_values[i] = datumCopy(track[i].value,
! stats->attr->attbyval,
! stats->attr->attlen);
mcv_freqs[i] = (double) track[i].count / (double) samplerows;
}
MemoryContextSwitchTo(old_context);
--- 2138,2145 ----
for (i = 0; i < num_mcv; i++)
{
mcv_values[i] = datumCopy(track[i].value,
! stats->attrtype->typbyval,
! stats->attrtype->typlen);
mcv_freqs[i] = (double) track[i].count / (double) samplerows;
}
MemoryContextSwitchTo(old_context);
***************
*** 2184,2193 ****
int nonnull_cnt = 0;
int toowide_cnt = 0;
double total_width = 0;
! bool is_varlena = (!stats->attr->attbyval &&
! stats->attr->attlen == -1);
! bool is_varwidth = (!stats->attr->attbyval &&
! stats->attr->attlen < 0);
double corr_xysum;
Oid cmpFn;
int cmpFlags;
--- 2196,2205 ----
int nonnull_cnt = 0;
int toowide_cnt = 0;
double total_width = 0;
! bool is_varlena = (!stats->attrtype->typbyval &&
! stats->attrtype->typlen == -1);
! bool is_varwidth = (!stats->attrtype->typbyval &&
! stats->attrtype->typlen < 0);
double corr_xysum;
Oid cmpFn;
int cmpFlags;
***************
*** 2476,2483 ****
for (i = 0; i < num_mcv; i++)
{
mcv_values[i] = datumCopy(values[track[i].first].value,
! stats->attr->attbyval,
! stats->attr->attlen);
mcv_freqs[i] = (double) track[i].count / (double) samplerows;
}
MemoryContextSwitchTo(old_context);
--- 2488,2495 ----
for (i = 0; i < num_mcv; i++)
{
mcv_values[i] = datumCopy(values[track[i].first].value,
! stats->attrtype->typbyval,
! stats->attrtype->typlen);
mcv_freqs[i] = (double) track[i].count / (double) samplerows;
}
MemoryContextSwitchTo(old_context);
***************
*** 2583,2590 ****
for (i = 0; i < num_hist; i++)
{
hist_values[i] = datumCopy(values[pos].value,
! stats->attr->attbyval,
! stats->attr->attlen);
pos += delta;
posfrac += deltafrac;
if (posfrac >= (num_hist - 1))
--- 2595,2602 ----
for (i = 0; i < num_hist; i++)
{
hist_values[i] = datumCopy(values[pos].value,
! stats->attrtype->typbyval,
! stats->attrtype->typlen);
pos += delta;
posfrac += deltafrac;
if (posfrac >= (num_hist - 1))
Index: src/include/commands/vacuum.h
===================================================================
RCS file: /cvsroot/pgsql/src/include/commands/vacuum.h,v
retrieving revision 1.89
diff -c -r1.89 vacuum.h
*** src/include/commands/vacuum.h 9 Feb 2010 21:43:30 -0000 1.89
--- src/include/commands/vacuum.h 1 Aug 2010 19:56:12 -0000
***************
*** 62,70 ****
/*
* These fields are set up by the main ANALYZE code before invoking the
* type-specific typanalyze function.
*/
Form_pg_attribute attr; /* copy of pg_attribute row for column */
! Form_pg_type attrtype; /* copy of pg_type row for column */
MemoryContext anl_context; /* where to save long-lived data */
/*
--- 62,78 ----
/*
* These fields are set up by the main ANALYZE code before invoking the
* type-specific typanalyze function.
+ *
+ * Note: do not assume that the data being analyzed has the same datatype
+ * shown in attr, ie do not trust attr->atttypid, attlen, etc. This is
+ * because some index opclasses store a different type than the underlying
+ * column/expression. Instead use attrtypid, attrtypmod, and attrtype for
+ * information about the datatype being fed to the typanalyze function.
*/
Form_pg_attribute attr; /* copy of pg_attribute row for column */
! Oid attrtypid; /* type of data being analyzed */
! int32 attrtypmod; /* typmod of data being analyzed */
! Form_pg_type attrtype; /* copy of pg_type row for attrtypid */
MemoryContext anl_context; /* where to save long-lived data */
/*
***************
*** 95,104 ****
/*
* These fields describe the stavalues[n] element types. They will be
! * initialized to be the same as the column's that's underlying the slot,
! * but a custom typanalyze function might want to store an array of
! * something other than the analyzed column's elements. It should then
! * overwrite these fields.
*/
Oid statypid[STATISTIC_NUM_SLOTS];
int2 statyplen[STATISTIC_NUM_SLOTS];
--- 103,111 ----
/*
* These fields describe the stavalues[n] element types. They will be
! * initialized to match attrtypid, but a custom typanalyze function might
! * want to store an array of something other than the analyzed column's
! * elements. It should then overwrite these fields.
*/
Oid statypid[STATISTIC_NUM_SLOTS];
int2 statyplen[STATISTIC_NUM_SLOTS];