Re: Database storage bloat - Mailing list pgsql-admin

From Tony and Bryn Reina
Subject Re: Database storage bloat
Date
Msg-id BAY8-DAV37L5QALrHx00001e801@hotmail.com
Whole thread Raw
In response to Database storage bloat  (reina_ga@hotmail.com (Tony Reina))
Responses Re: Database storage bloat
List pgsql-admin
I'm using 'text' instead of char. That seemed to cut the bloat down by about
30%. There's only about 11 tables in the DB. I included them at the bottom
in case you're interested.

Perhaps there's just something fundamentally boneheaded about my design
(re-reading "Database Design for Mere Mortals" as I write).

-Tony

CREATE SEQUENCE FileIndicies_fileID_seq;
CREATE TABLE FileIndicies (
 fileID      integer DEFAULT nextval('FileIndicies_fileID_seq') UNIQUE NOT
NULL,  --PK
 szFileName              text NOT NULL,
 szFileType  text,
 CreationDate  date,
 CreationTime  time,
 dtimestampresolution float4,
 dtimespan  float4,
 szAppName  text,
 szFileComment  text,
 PRIMARY KEY (fileID),
 UNIQUE (szFileName, szFileType, CreationDate, CreationTime)
);

CREATE SEQUENCE EntityFile_dbIndex_seq;
CREATE TABLE EntityFile (
 EntityID  integer, --PK
 fileID                  integer REFERENCES FileIndicies (fileID),
 dbIndex   integer DEFAULT nextval('EntityFile_dbIndex_seq') UNIQUE NOT
NULL,
 PRIMARY KEY (fileID, EntityID)
);

-- AnalogIndex
CREATE TABLE AnalogIndicies (
 dbIndex   integer REFERENCES EntityFile (dbIndex),
 dsamplerate  float4 CHECK (dsamplerate > (0)::float4),
 dminval   float4,
 dmaxval   float4 CHECK (dmaxval >= dminval),
 szunits   text,
 dresolution  float4 CHECK (dresolution > (0)::float4),
 dlocationx  float4,
 dlocationy  float4,
 dlocationz  float4,
 dlocationuser  float4,
 dhighfreqcorner  float4 CHECK (dhighfreqcorner >= (0)::float4),
 dwhighfreqorder  float4 CHECK (dwhighfreqorder >= (0)::float4),
 szhighfiltertype text,
 dlowfreqcorner  float4 CHECK (dlowfreqcorner >= (0)::float4),
 dwlowfreqorder  float4 CHECK (dwlowfreqorder >= (0)::float4),
 szlowfiltertype  float4,
 szprobeinfo  text,
 PRIMARY KEY (dbIndex)
);

CREATE TABLE AnalogValues (
 dbIndex                 integer REFERENCES EntityFile (dbIndex),
 dtimestamp  float4 NOT NULL,
 dvalue   float4,
 PRIMARY KEY (dbIndex, dtimestamp)
);

CREATE TABLE EventIndicies (
 dbIndex                 integer REFERENCES EntityFile (dbIndex),
 dweventtype  smallint CHECK (dweventtype >=0 AND dweventtype <= 4),
 dwmindatalength  smallint CHECK (dwmindatalength > (0)::float4),
 dwmaxdatalength  smallint CHECK (dwmaxdatalength >= dwmindatalength),
 szcsvdesc  text,
 PRIMARY KEY (dbIndex)
);

CREATE TABLE EventValues (
 dbIndex                 integer REFERENCES EntityFile (dbIndex),
 dtimestamp  float4,
 dwdatabytesize  smallint CHECK (dwdatabytesize > (0)::float4),
 eventvalue  text,
 PRIMARY KEY (dbIndex, dtimestamp)
);

CREATE TABLE NeuralIndicies (
 dbIndex                 integer REFERENCES EntityFile (dbIndex),
 dwsourceentityid smallint,
 dwsourceunitid  smallint,
 szprobeinfo  text,
 PRIMARY KEY (dbIndex)
);

CREATE TABLE NeuralValues (
 dbIndex                 integer REFERENCES EntityFile (dbIndex),
 dtimestamp  float4 NOT NULL,
 PRIMARY KEY (dbIndex, dtimestamp)
);

CREATE TABLE SegmentIndicies (
 dbIndex                 integer REFERENCES EntityFile (dbIndex),
 dwsourcecount  smallint CHECK (dwsourcecount > (0)::float4),
 dwminsamplecount smallint CHECK (dwminsamplecount > (0)::float4),
 dwmaxsamplecount smallint CHECK (dwmaxsamplecount > (0)::float4),
 dsamplerate  float4 CHECK (dsamplerate > (0)::float4),
 szunits   text,
 PRIMARY KEY (dbIndex)
);

CREATE TABLE SegmentSourceIndicies (
 dbIndex                 integer REFERENCES EntityFile (dbIndex),
 dwsourceid              integer CHECK (dwsourceid >= 0),
 dminval   float4,
 dmaxval   float4 CHECK (dmaxval >= dminval),
 dresolution  float4 CHECK (dresolution > (0)::float4),
 dsubsampleshift  float4,
 dlocationx  float4,
 dlocationy  float4,
 dlocationz  float4,
 dlocationuser  float4,
 dhighfreqcorner  float4 CHECK (dhighfreqcorner >= (0)::float4),
 dwhighfreqorder  float4 CHECK (dwhighfreqorder >= (0)::float4),
 szhighfiltertype text,
 dlowfreqcorner  float4 CHECK (dlowfreqcorner >= (0)::float4),
 dwlowfreqorder  float4 CHECK (dwlowfreqorder >= (0)::float4),
 szlowfiltertype  text,
 szprobeinfo  text,
 PRIMARY KEY (dbIndex, dwsourceid)
);

CREATE TABLE SegmentValues (
 dbIndex                 integer REFERENCES EntityFile (dbIndex),
 dwunitid  smallint,
 dwsampleindex  smallint,
 dtimestamp  float4,
 dvalue   float4,
 PRIMARY KEY (dbIndex, dtimestamp, dwsampleindex, dwunitid)
);

CREATE VIEW SegmentData AS
 SELECT a.szFileName, a.szFileType, a.creationdate, a.creationtime,
 c.EntityID,
 b.dwunitid, b.dwsampleindex, b.dtimestamp, b.dvalue
 FROM FileIndicies AS a, SegmentValues AS b, EntityFile AS c
 WHERE
 b.dbIndex = c.dbIndex AND
 a.fileID = c.fileID
 ORDER BY a.szFileName, a.szFileType, a.creationdate, a.creationtime,
c.EntityID,
 b.dwunitid, b.dwsampleindex, b.dtimestamp;




----- Original Message -----
From: "Douglas Trainor" <trainor@uic.edu>
To: "Tony Reina" <reina_ga@hotmail.com>
Cc: <pgsql-admin@postgresql.org>
Sent: Thursday, April 08, 2004 1:41 PM
Subject: Re: [ADMIN] Database storage bloat


> Saying "we've set field sizes to their theoretical skinniness" makes me
> think that
> you may have the wrong data types.  For example, you may have used CHAR
> and not VARCHAR.
>
>     douglas
>
> Tony Reina wrote:
>
> >I'm developing a database for scientific recordings. These recordings
> >are traditionally saved as binary flat files for simplicity and
> >compact storage. Although I think ultimately having a database is
> >better than 1,000s of flat files in terms of data access, I've found
> >that the database (or at least my design) is pretty wasteful on
> >storage space compared with the binary flat files.
> >
> >In particular, I tried importing all of the data from a binary flat
> >file that is 1.35 MB into a PostgreSQL database (a very small test
> >file; average production file is probably more like 100 MB). The
> >database directory ballooned from 4.1 MB to 92 MB (a bloat of 65X the
> >original storage of the binary flat file).
> >
> >Now I know that table design and normalizing is important. As far as
> >my partner and I can tell, we've made good use of normalizing (no
> >redundancy), we've set field sizes to their theoretical skinniness,
> >[...]
> >
> >
>
>

pgsql-admin by date:

Previous
From: "Tony and Bryn Reina"
Date:
Subject: Re: Database storage bloat
Next
From: "Tom Bakken"
Date:
Subject: Re: Out of space