Re: (Updated) Table File Format - Mailing list pgsql-hackers
From | Hannu Krosing |
---|---|
Subject | Re: (Updated) Table File Format |
Date | |
Msg-id | 3A38C4BA.97812EDF@tm.ee Whole thread Raw |
In response to | (Updated) Table File Format ("Michael Richards" <miker@interchange.ca>) |
List | pgsql-hackers |
Michael Richards wrote: > > I need a little help on the format of the postgres tables. > > I've got this wonderfully corrupted database where just about everything is > fubar. I've tried a number of things to get it back using postgres and > related tools with no success. It looks like most of the data is there, but > there may be a small amount of corruption that's causing all kinds of > problems. Find attached a python script that I used to get deleted (actually all ;) records from a database table. It was not corrupted, just a simple programming error in client software had deleted more than needed. Fortunately it was not vacuumed so the data itself (info for web-based paper postcard sending system) was there It works as-is only for my table as the field extraction code is hard-coded, but it should be quite easy to modify for your needs It worked 1 year ago probably on 6.4.x . I hope that the structure had not changed since. sendcard.py is the actual script used, pgtabdump.py is a somewhat cleaned-up version --------------- Hannu#!/usr/bin/python import sys,os,struct,string site_base = "/var/lib/pgsql/base" db_name = "betest" table_name = "test" db_path = os.path.join(site_base,db_name) table_path = os.path.join(db_path,table_name) page_size = 8*1024 def strbits(s,len): bits = [] while s: c = s[0] s = s[1:] b = struct.unpack('B',c)[0] for i inrange(8): if b & (1<<i): bits.append(1) else: bits.append(0) return string.join(map(str,bits),'')[:len] class table_page: "class to represent a database table page" def __init__(self,fd,page_nr): fd.seek(page_nr*page_size) self.rawdata = fd.read(page_size) self.lower,\ self.upper,\ self.special,\ self.opaque = struct.unpack('HHHH',self.rawdata[:8]) self.item_pointers=[] self.items=[] for i in range(8,self.lower,4): rawItemIdData = self.rawdata[i:i+4] ItemIdData_I32= struct.unpack('L',rawItemIdData)[0] if not ItemIdData_I32: break lp_len = int(ItemIdData_I32>> 17) lp_flags = int((ItemIdData_I32 >> 15) & 3) lp_off = int(ItemIdData_I32 & 0x7fff) self.item_pointers.append(hex(ItemIdData_I32),lp_off,lp_flags,lp_len) rawItemData = self.rawdata[lp_off:lp_off+lp_len] t_oid = struct.unpack('L', rawItemData[ 0: 4])[0] t_001 = struct.unpack('L',rawItemData[ 4: 8])[0] t_002 = struct.unpack('L', rawItemData[ 8:12])[0] t_xmin =struct.unpack('L', rawItemData[12:16])[0] t_xmax = struct.unpack('L', rawItemData[16:20])[0] t_ctid= struct.unpack('LH', rawItemData[20:26]) t_fcnt = struct.unpack('H', rawItemData[26:28])[0] t_xxx = struct.unpack('2B', rawItemData[28:30]) t_doff = struct.unpack('B', rawItemData[30:31])[0] t_mask = strbits(rawItemData[31:t_doff],t_fcnt) # t_mask = rawItemData[31:t_doff] t_data = rawItemData[t_doff:] self.items.append(t_oid,t_001,t_002,t_xmin,t_xmax,t_ctid,t_fcnt,t_xxx,t_doff,t_mask,t_data) def __str__(self): strval= [str((self.lower, self.upper, self.special, self.opaque))] strval.append(string.join(map(str,self.item_pointers),'\n')) strval.append(string.join(map(repr,self.items),'\n')) return string.join(strval,'\n------------------\n') if __name__=="__main__": print '# dumping %s' % table_path fd = open(table_name) page = table_page(fd,0) print page #!/usr/bin/python import sys,os,struct,string table_name = "sendcard" page_size = 8*1024 def strbits(s,len): bits = [] while s: c = s[0] s = s[1:] b = struct.unpack('B',c)[0] for i inrange(8): if b & (1<<i): bits.append(1) else: bits.append(0) bits = string.join(map(str,bits[:len]),'') return bits """ rapos=> \d sendcard Table = sendcard +----------------------------------+----------------------------------+-------+ | Field | Type | Length| +----------------------------------+----------------------------------+-------+ | card_id | int4 default nextval ( 'card_id_ | 4 | | card_name | text | var | | recipient_name | text | var | | recipient_street_name | text | var | | recipient_house_nr | text | var | | recipient_apartment_nr | text | var | | recipient_city | text | var | | recipient_village | text | var | | recipient_state | text | var | | recipient_zip_code | text | var | | sender_name | text | var | | sender_email | text | var | | message | text | var | | bank_ref_id | text | var | | delivery_nr | text | var | | delivery_route | text | var | | sender_remote_addr | text | var | | card_cost | float8 | 8 | | cookie_id | int4 default nextval ( 'cookie_i | 4 | | is_payed | text | var | | printing_date | datetime | 8 | | delivery_date | date | 4 | | payment_date | date | 4 | | entered_at | timestamp default now ( ) | 4 | | send_bill_to | text | var | +----------------------------------+----------------------------------+-------+ """ create_statement = """\ create table savecard( u_oid int, u_xmin int, u_xmax int, card_id int4, card_name text, recipient_name text, recipient_street_name text, recipient_house_nr text, recipient_apartment_nr text, recipient_city text, recipient_village text, recipient_state text, recipient_zip_code text, sender_name text, sender_email text, message text, bank_ref_id text, delivery_nr text, delivery_route text, sender_remote_addr text ); copy savecard from stdin; """ def bin2text(buf,offset): offset = ((offset + 3)/4)*4 len = int(struct.unpack('L',buf[offset:offset+4])[0]) str = buf[offset+4:offset+len] newoff = offset+len return str,newoff class table_page: "class to represent a database table page" def __init__(self,fd,page_nr): fd.seek(page_nr*page_size) self.rawdata = fd.read(page_size) self.lower,\ self.upper,\ self.special,\ self.opaque = struct.unpack('HHHH',self.rawdata[:8]) self.item_pointers=[] self.items=[] for i in range(8,self.lower,4): rawItemIdData = self.rawdata[i:i+4] ItemIdData_I32= struct.unpack('L',rawItemIdData)[0] if not ItemIdData_I32: break lp_len = int(ItemIdData_I32>> 17) lp_flags = int((ItemIdData_I32 >> 15) & 3) lp_off = int(ItemIdData_I32 & 0x7fff) self.item_pointers.append(hex(ItemIdData_I32),lp_off,lp_flags,lp_len) rawItemData = self.rawdata[lp_off:lp_off+lp_len] t_oid = struct.unpack('L', rawItemData[ 0: 4])[0] t_001 = struct.unpack('L',rawItemData[ 4: 8])[0] t_002 = struct.unpack('L', rawItemData[ 8:12])[0] t_xmin =struct.unpack('L', rawItemData[12:16])[0] t_xmax = struct.unpack('L', rawItemData[16:20])[0] t_ctid= struct.unpack('LH', rawItemData[20:26]) t_fcnt = struct.unpack('H', rawItemData[26:28])[0] t_xxx = struct.unpack('2B', rawItemData[28:30]) t_doff = struct.unpack('B', rawItemData[30:31])[0] t_mask = strbits(rawItemData[31:t_doff],t_fcnt) t_data = rawItemData[t_doff:] id = int(struct.unpack('L',t_data[:4])[0]) values = [int(t_oid),int(t_xmin),int(t_xmax),id] texts = [] offset = 4 for c in t_mask[1:17]: if not int(c): values.append('') continue text,offset = bin2text(t_data,offset) values.append(repr(text)) values=values+texts self.items.append(values) # self.items.append(t_oid,t_001,t_002,t_xmin,t_xmax,t_ctid,t_fcnt,t_xxx,t_doff,t_mask,t_data) def __str__(self): # strval = [str((self.lower, self.upper, self.special, self.opaque))] # strval.append(string.join(map(str,self.item_pointers),'\n')) # strval.append(string.join(map(repr,self.items),'\n')) # return string.join(strval,'\n------------------\n') rows = [] for record in self.items: rows.append(string.join(map(str,record),'\t')) return string.join(rows,'\n') if __name__=="__main__": # print '# dumping %s' % table_name print create_statement fd = open(table_name) page_nr = 0 while 1: try: page = table_page(fd,page_nr) print page except: break page_nr = page_nr+ 1 print '\\.\n'
pgsql-hackers by date: