Pytables

HDF5
Hierarchical Data Format

Thursday, January 5, 2012

/the/object/tree
• Datasets, Leaf
• Tables, records with fixed-length fields
• Arrays: Matrices of same type
• VLArray, EArray, Array
• Groups
• May contain groups and datasets

from tables import *

# Define a user record to characterize some kind of particles
class Particle(IsDescription):
name = StringCol(16) # 16-character String
idnumber = Int64Col() # Signed 64-bit integer
ADCcount = UInt16Col() # Unsigned short integer
TDCcount = UInt8Col() # unsigned byte
grid_i = Int32Col() # integer
grid_j = Int32Col() # integer
pressure = Float32Col() # float (single-precision)
energy = FloatCol() # double (double-precision)

filename = "test.h5"
# Open a file in "w"rite mode
h5file = openFile(filename, mode = "w", title = "Test file")
# Create a new group under "/" (root)
group = h5file.createGroup("/", 'detector', 'Detector information')
# Create one table on it
table = h5file.createTable(group, 'readout', Particle, "Readout example")
# Fill the table with 10 particles
particle = table.row
for i in xrange(10):
particle['name'] = 'Particle: %6d' % (i)
particle['TDCcount'] = i % 256
particle['ADCcount'] = (i * 256) % (1 << 16)
particle['grid_i'] = i
particle['grid_j'] = 10 - i
particle['pressure'] = float(i*i)
particle['energy'] = float(particle['pressure'] ** 4)
particle['idnumber'] = i * (2 ** 34)
# Insert a new particle record
particle.append()
# Close (and flush) the file
h5file.close()


Filling a table
>>> class Particle(IsDescription):
... name = StringCol(16) # 16-character String
... idnumber = Int64Col() # Signed 64-bit integer
... ADCcount = UInt16Col() # Unsigned short integer
... TDCcount = UInt8Col() # unsigned byte
... grid_i = Int32Col() # 32-bit integer
... grid_j = Int32Col() # 32-bit integer
... pressure = Float32Col() # float (single-precision)
... energy = Float64Col() # double (double-precision)

>>> table = h5file.root.detector.readout
>>> particle = table.row
>>> for i in xrange(10, 15):
... particle['name'] = 'Particle: %6d' % (i)
... particle['TDCcount'] = i % 256
... particle['ADCcount'] = (i * 256) % (1 << 16)
... particle['grid_i'] = i
... particle['grid_j'] = 10 - i
... particle['pressure'] = float(i*i)
... particle['energy'] = float(particle['pressure'] ** 4)
... particle['idnumber'] = i * (2 ** 34)
... particle.append()
>>> table.flush()


Accessing a table:
Slicing

>>> table.cols.TDCcount[0] = 1
>>> table.cols.energy[1:9:3] = [2,3,4]


Search in Tables
>>> class Particle(IsDescription):
... name = StringCol(16) # 16-character String
... idnumber = Int64Col() # Signed 64-bit integer
... ADCcount = UInt16Col() # Unsigned short integer
... TDCcount = UInt8Col() # unsigned byte
... grid_i = Int32Col() # 32-bit integer
... grid_j = Int32Col() # 32-bit integer
... pressure = Float32Col() # float (single-precision)
... energy = Float64Col() # double (double-precision)

>>> pressure = [x['pressure'] for x in table.iterrows() if x['TDCcount'] > 3 and 20 <= x
['pressure'] < 50]
>>> pressure
[25.0, 36.0, 49.0]

“In-Kernel” Version
>>> names = [ x['name'] for x in table.where("""(TDCcount > 3) & (20 <= pressure) & (pressure < 50)"
>>> names
['Particle: 5', 'Particle: 6', 'Particle: 7']


Attributes

>>> table.attrs.gath_date = "Wed, 06/12/2003 18:33"
>>> table.attrs.temperature = 18.4
>>> table.attrs.temp_scale = "Celsius"


(C)Arrays
import numpy
import tables

fileName = 'carray1.h5'
shape = (200, 300)
atom = tables.UInt8Atom()
filters = tables.Filters(complevel=5, complib='zlib')

h5f = tables.openFile(fileName, 'w')
ca = h5f.createCArray(h5f.root, 'carray', atom, shape, filters=filters)

# Fill a hyperslab in ``ca``.
ca[10:60, 20:70] = numpy.ones((50, 50))
h5f.close()

# Re-open and read another hyperslab
h5f = tables.openFile(fileName)
print h5f
print h5f.root.carray[8:12, 18:22]
h5f.close()


(E)Arrays
import tables
import numpy

fileh = tables.openFile('earray1.h5', mode='w')
a = tables.StringAtom(itemsize=8)

# Use ''a'' as the object type for the enlargeable array.
array_c = fileh.createEArray(fileh.root, 'array_c', a, (0,), "Chars")
array_c.append(numpy.array(['a'*2, 'b'*4], dtype='S8'))
array_c.append(numpy.array(['a'*6, 'b'*8, 'c'*10], dtype='S8'))

# Read the string ''EArray'' we have created on disk.
for s in array_c:
print 'array_c[%s] => %r' % (array_c.nrow, s)

# Close the file.
fileh.close()


Pytables likes Numpy
>>> gcolumns = h5file.createGroup(h5file.root, "columns", "Pressure and Name")

>>> h5file.createArray(gcolumns, 'pressure', array(pressure))
"Pressure column selection")
/columns/pressure (Array(3,)) 'Pressure column selection'
atom := Float64Atom(shape=(), dflt=0.0)
maindim := 0
flavor := 'numpy'
byteorder := 'little'
chunkshape := None

>>> h5file.createArray(gcolumns, 'name', names, "Name column selection")
/columns/name (Array(3,)) 'Name column selection'
atom := StringAtom(itemsize=16, shape=(), dflt='')
maindim := 0
flavor := 'python'
byteorder := 'irrelevant'
chunkshape := None


def _get_pgroup(self, file, p, proj = None):
"""
Get group node of tables.File corresponding to property p.

Creates group node, if it does not exist yet.

:param tables.File file: Handle to HDF5 file to which records are saved.
:param string p: To be recorded property.
:param Projection proj: Projection from which property p is recorded.

:return: Group node corresponding to property p.
"""

SDict = self.sim.config.ShapeDispatch

if not proj:
name = self.sheet.name
else:
name = proj.name

try:
pgroup = file.getNode('/%s_%s' % (p, name,))

except NoSuchNodeError:
pgroup = file.createGroup('/', '%s_%s' % (p, name,))
file.createEArray(pgroup, 'data', Float64Atom(),
flatten((0, SDict[p])))
file.createEArray(pgroup, 'step', Int32Atom(), (0, 1))

return pgroup

def _write_attr(self, pgroup, data):
"""
Helper fn writing provided data and step count to group node (of
tables.File)

:param tables.group.Group pgroup: Group node to which data is saved.
:param numpy.Array data: Data matrix to be recorded.
"""

pgroup.data.append([data])
pgroup.step.append([[self.count]])

def function(self):
"""
Stores activity submatrices from recordings file per node to 3D array
and returns reshaped 2D version of it.
"""

x = self.x
y = self.y
size = self.size
nnames = self.nnames

array = np.zeros((len(nnames), size, size))

with openFile(self.path, 'r') as file:
for i, nname in enumerate(nnames):
node = file.getNode(nname)
array[i, :, :] =
node.data.read(self.cnt)[0, x : x + size, y : y + size]

return array.reshape(size, size * len(nnames))


Useful Programs

• HDFView or ViTables
• h5dump
• hdf5read, hdf5info (MATLAB)


Pytables

Recommended

Recommended

More Related Content

What's hot

What's hot (20)

Viewers also liked

Viewers also liked (20)

Similar to Pytables

Similar to Pytables (20)

More from rocketcircus

More from rocketcircus (7)

Recently uploaded

Recently uploaded (20)

Pytables