[dataview] fix chunking and loading for n-d data

This commit is contained in:
Jan Grewe 2021-01-18 09:55:47 +01:00
parent ecb20e5ff8
commit 147ebd64ca
4 changed files with 40 additions and 28 deletions

View File

@ -10,7 +10,7 @@ settings_recent_files_key = "/".join([organization, application, "recent_files"]
settings_recent_file_max_count_key = "/".join([organization, application, "recent_files_max_count"]) settings_recent_file_max_count_key = "/".join([organization, application, "recent_files_max_count"])
settings_recent_file_max_count = 10 settings_recent_file_max_count = 10
io_chunksize = 10000000 max_chunksize = 1000000000
PACKAGE_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir)) PACKAGE_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir))
ICONS_FOLDER = os.path.join(PACKAGE_ROOT, "icons") ICONS_FOLDER = os.path.join(PACKAGE_ROOT, "icons")

View File

@ -347,7 +347,7 @@ class PlotScreen(QWidget):
try: try:
self._data_view = DataView(item, self._file_handler) self._data_view = DataView(item, self._file_handler)
except ValueError as e: except ValueError as e:
print(e) print("error in plotscreen.plot", e)
return return
self._data_view.request_more() # TODO this is just a test, needs to be removed self._data_view.request_more() # TODO this is just a test, needs to be removed
print(self._data_view) print(self._data_view)

View File

@ -1,5 +1,5 @@
import numpy as np import numpy as np
from nixview.constants import io_chunksize as chunksize from nixview.constants import max_chunksize as chunksize
class DataView(): class DataView():
@ -10,8 +10,9 @@ class DataView():
self._full_shape = item_descriptor.shape self._full_shape = item_descriptor.shape
self._buffer = None self._buffer = None
self._offset = np.zeros(len(self._full_shape), dtype=int) self._offset = np.zeros(len(self._full_shape), dtype=int)
self._fetched_data = np.zeros(len(self._full_shape), dtype=int)
self._count = None self._count = None
self._max_dim = None self._cut_dim = None
self.init_buffer() self.init_buffer()
self.request_more() self.request_more()
@ -24,36 +25,46 @@ class DataView():
sl = tuple([slice(o, o + c) for o, c in zip(self._offset, valid_count)]) sl = tuple([slice(o, o + c) for o, c in zip(self._offset, valid_count)])
self._buffer[sl] = self._file_handler.request_data(self._item_descriptor, self._offset, self._buffer[sl] = self._file_handler.request_data(self._item_descriptor, self._offset,
valid_count) valid_count)
self._offset = tuple([sum(x) for x in zip(self._offset, self._count)]) new_ofst = np.zeros_like(self._offset)
for i, x in enumerate(zip(self._offset, valid_count)):
if i == self._cut_dim:
new_ofst[i] = sum(x)
self._offset = tuple(new_ofst)
self._fetched_data = tuple([sum(x) for x in zip(self._fetched_data, self._count)])
#if data is not None and self._buffer is None: def init_chunking(self):
# self._buffer = data """decides on the chunks size for reading. Heuristic is based on the dimensionality of the data and the "best xdim" if available.
# self._offset = data.shape If data is 2D the best xdim is loaded in chunks (if necessary) while the other is fully loaded. For 3D and more it is the last dimension that is cut. If the number of data points in the first n-1 dimensions exceeds the maximum chunksize (settings) an error will be thrown.
#else: """
# from IPython import embed max_element_count = chunksize
# embed() if self._item_descriptor.best_xdim is not None:
cut_dim = self._item_descriptor.best_xdim
else:
cut_dim = len(self._full_shape) - 1
if np.prod(self._full_shape[:-1]) > chunksize:
raise ValueError("Cannot load data in chunks! maxchunksize too small: product of elements in first %i dimensions exceeds max chunksize! (%i > %i)" % (len(self._full_shape) -1, np.prod(self._full_shape[:-1]), chunksize))
chunk_shape = np.zeros(len(self._full_shape), dtype=int)
for i, d in enumerate(self._full_shape):
if i != cut_dim:
chunk_shape[i] = d
max_element_count /= d
chunk_shape[cut_dim] = max_element_count
self._cut_dim = cut_dim
self._count = chunk_shape
def init_buffer(self): def init_buffer(self):
buffer_shape = np.zeros(len(self._full_shape), dtype=int) self.init_chunking()
max_dim_count = chunksize
max_dim = np.argmax(self._full_shape)
for i, d in enumerate(self._full_shape):
if i != max_dim:
buffer_shape[i] = self._full_shape[i]
max_dim_count /= self._full_shape[i]
buffer_shape[max_dim] = max_dim_count
self._count = buffer_shape
self._max_dim = max_dim
try: try:
self._buffer = np.empty(self._full_shape) self._buffer = np.empty(self._full_shape)
except: except:
raise ValueError("Cannot handle so many data points!") #FIXME raise ValueError("Error reserving buffer! Cannot handle so many data points!") #FIXME
print("init buffer")
@property @property
def fully_loaded(self): def fully_loaded(self):
return self._buffer is not None and self._full_shape == self._offset return np.all(self._buffer is not None and self._fetched_data == self._full_shape)
def __str__(self) -> str: def __str__(self) -> str:
r = self._item_descriptor.name + " " + str(self._item_descriptor.entity_type) r = self._item_descriptor.name + " " + str(self._item_descriptor.entity_type)

View File

@ -1,4 +1,3 @@
from nixview.file_utils import suggested_plotter
import os import os
import nixio as nix import nixio as nix
import numpy as np import numpy as np
@ -103,6 +102,8 @@ class FileHandler(metaclass=Singleton):
for i, (o, c) in enumerate(zip(offset, count)): for i, (o, c) in enumerate(zip(offset, count)):
if o + c > shape[i]: if o + c > shape[i]:
valid_count[i] = shape[i] - o valid_count[i] = shape[i] - o
else:
valid_count[i] = c
return valid_count return valid_count
def count_is_valid(self, shape, offset, count): def count_is_valid(self, shape, offset, count):