DevoxxFR 2024 Reproducible Builds with Apache Maven
Pytables
1. HDF5
Hierarchical Data Format
Thursday, January 5, 2012
2. /the/object/tree
• Datasets, Leaf
• Tables, records with fixed-length fields
• Arrays: Matrices of same type
• VLArray, EArray, Array
• Groups
• May contain groups and datasets
Thursday, January 5, 2012
3. from tables import *
# Define a user record to characterize some kind of particles
class Particle(IsDescription):
name = StringCol(16) # 16-character String
idnumber = Int64Col() # Signed 64-bit integer
ADCcount = UInt16Col() # Unsigned short integer
TDCcount = UInt8Col() # unsigned byte
grid_i = Int32Col() # integer
grid_j = Int32Col() # integer
pressure = Float32Col() # float (single-precision)
energy = FloatCol() # double (double-precision)
filename = "test.h5"
# Open a file in "w"rite mode
h5file = openFile(filename, mode = "w", title = "Test file")
# Create a new group under "/" (root)
group = h5file.createGroup("/", 'detector', 'Detector information')
# Create one table on it
table = h5file.createTable(group, 'readout', Particle, "Readout example")
# Fill the table with 10 particles
particle = table.row
for i in xrange(10):
particle['name'] = 'Particle: %6d' % (i)
particle['TDCcount'] = i % 256
particle['ADCcount'] = (i * 256) % (1 << 16)
particle['grid_i'] = i
particle['grid_j'] = 10 - i
particle['pressure'] = float(i*i)
particle['energy'] = float(particle['pressure'] ** 4)
particle['idnumber'] = i * (2 ** 34)
# Insert a new particle record
particle.append()
# Close (and flush) the file
h5file.close()
Thursday, January 5, 2012
9. (C)Arrays
import numpy
import tables
fileName = 'carray1.h5'
shape = (200, 300)
atom = tables.UInt8Atom()
filters = tables.Filters(complevel=5, complib='zlib')
h5f = tables.openFile(fileName, 'w')
ca = h5f.createCArray(h5f.root, 'carray', atom, shape, filters=filters)
# Fill a hyperslab in ``ca``.
ca[10:60, 20:70] = numpy.ones((50, 50))
h5f.close()
# Re-open and read another hyperslab
h5f = tables.openFile(fileName)
print h5f
print h5f.root.carray[8:12, 18:22]
h5f.close()
Thursday, January 5, 2012
10. (E)Arrays
import tables
import numpy
fileh = tables.openFile('earray1.h5', mode='w')
a = tables.StringAtom(itemsize=8)
# Use ''a'' as the object type for the enlargeable array.
array_c = fileh.createEArray(fileh.root, 'array_c', a, (0,), "Chars")
array_c.append(numpy.array(['a'*2, 'b'*4], dtype='S8'))
array_c.append(numpy.array(['a'*6, 'b'*8, 'c'*10], dtype='S8'))
# Read the string ''EArray'' we have created on disk.
for s in array_c:
print 'array_c[%s] => %r' % (array_c.nrow, s)
# Close the file.
fileh.close()
Thursday, January 5, 2012
12. def _get_pgroup(self, file, p, proj = None):
"""
Get group node of tables.File corresponding to property p.
Creates group node, if it does not exist yet.
:param tables.File file: Handle to HDF5 file to which records are saved.
:param string p: To be recorded property.
:param Projection proj: Projection from which property p is recorded.
:return: Group node corresponding to property p.
"""
SDict = self.sim.config.ShapeDispatch
if not proj:
name = self.sheet.name
else:
name = proj.name
try:
pgroup = file.getNode('/%s_%s' % (p, name,))
except NoSuchNodeError:
pgroup = file.createGroup('/', '%s_%s' % (p, name,))
file.createEArray(pgroup, 'data', Float64Atom(),
flatten((0, SDict[p])))
file.createEArray(pgroup, 'step', Int32Atom(), (0, 1))
return pgroup
def _write_attr(self, pgroup, data):
"""
Helper fn writing provided data and step count to group node (of
tables.File)
:param tables.group.Group pgroup: Group node to which data is saved.
:param numpy.Array data: Data matrix to be recorded.
"""
pgroup.data.append([data])
pgroup.step.append([[self.count]])
Thursday, January 5, 2012
13. def function(self):
"""
Stores activity submatrices from recordings file per node to 3D array
and returns reshaped 2D version of it.
"""
x = self.x
y = self.y
size = self.size
nnames = self.nnames
array = np.zeros((len(nnames), size, size))
with openFile(self.path, 'r') as file:
for i, nname in enumerate(nnames):
node = file.getNode(nname)
array[i, :, :] =
node.data.read(self.cnt)[0, x : x + size, y : y + size]
return array.reshape(size, size * len(nnames))
Thursday, January 5, 2012
14. Useful Programs
• HDFView or ViTables
• h5dump
• hdf5read, hdf5info (MATLAB)
Thursday, January 5, 2012