[dataview] fix chunking and loading for n-d data

2021-01-18 09:55:47 +01:00 · 2021-01-18 09:55:47 +01:00 · 147ebd64ca
commit 147ebd64ca
parent ecb20e5ff8
4 changed files with 40 additions and 28 deletions
--- a/nixview/constants.py
+++ b/nixview/constants.py
@ -10,7 +10,7 @@ settings_recent_files_key = "/".join([organization, application, "recent_files"]
 settings_recent_file_max_count_key =  "/".join([organization, application, "recent_files_max_count"])
 settings_recent_file_max_count = 10
-io_chunksize = 10000000
+max_chunksize = 1000000000
 PACKAGE_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir))
 ICONS_FOLDER = os.path.join(PACKAGE_ROOT, "icons")
--- a/nixview/ui/plotscreen.py
+++ b/nixview/ui/plotscreen.py
@ -347,7 +347,7 @@ class PlotScreen(QWidget):
        try:
            self._data_view = DataView(item, self._file_handler)
        except ValueError as e:
-            print(e)
+            print("error in plotscreen.plot", e)
            return
        self._data_view.request_more() # TODO this is just a test, needs to be removed
        print(self._data_view)
--- a/nixview/util/dataview.py
+++ b/nixview/util/dataview.py
@ -1,5 +1,5 @@
 import numpy as np
-from nixview.constants import io_chunksize as chunksize
+from nixview.constants import max_chunksize as chunksize
 class DataView():
@ -10,8 +10,9 @@ class DataView():
        self._full_shape = item_descriptor.shape
        self._buffer = None
        self._offset = np.zeros(len(self._full_shape), dtype=int)
        self._fetched_data = np.zeros(len(self._full_shape), dtype=int)
        self._count = None
-        self._max_dim = None
+        self._cut_dim = None
        self.init_buffer()
        self.request_more()
@ -24,36 +25,46 @@ class DataView():
        sl = tuple([slice(o, o + c) for o, c in zip(self._offset, valid_count)])
        self._buffer[sl] = self._file_handler.request_data(self._item_descriptor, self._offset,
                                                           valid_count)
-        self._offset = tuple([sum(x) for x in zip(self._offset, self._count)])
+        new_ofst = np.zeros_like(self._offset)
        for i, x in enumerate(zip(self._offset, valid_count)):
            if i == self._cut_dim:
                new_ofst[i] = sum(x)
        self._offset = tuple(new_ofst)
        self._fetched_data = tuple([sum(x) for x in zip(self._fetched_data, self._count)])
-        #if data is not None and self._buffer is None:
+    def init_chunking(self):
-        #    self._buffer = data
+        """decides on the chunks size for reading. Heuristic is based on the dimensionality of the data and the "best xdim" if available.
-        #    self._offset = data.shape
+        If data is 2D the best xdim is loaded in chunks (if necessary) while the other is fully loaded. For 3D and more it is the last dimension that is cut. If the number of data points in the first n-1 dimensions exceeds the maximum chunksize (settings) an error will be thrown.
-        #else:
+        """
-        #    from IPython import embed
+        max_element_count = chunksize
-        #    embed()
+        if self._item_descriptor.best_xdim is not None:
            cut_dim = self._item_descriptor.best_xdim 
        else:
            cut_dim = len(self._full_shape) - 1
            if np.prod(self._full_shape[:-1]) > chunksize:
                raise ValueError("Cannot load data in chunks! maxchunksize too small: product of elements in first %i dimensions exceeds max chunksize! (%i > %i)" % (len(self._full_shape) -1, np.prod(self._full_shape[:-1]), chunksize))
        chunk_shape = np.zeros(len(self._full_shape), dtype=int)
        for i, d in enumerate(self._full_shape):
            if i != cut_dim:
                chunk_shape[i] = d
                max_element_count /= d
        chunk_shape[cut_dim] = max_element_count
        self._cut_dim = cut_dim
        self._count = chunk_shape
    def init_buffer(self):
-        buffer_shape = np.zeros(len(self._full_shape), dtype=int)
+        self.init_chunking()
        max_dim_count = chunksize
        max_dim = np.argmax(self._full_shape)
        for i, d in enumerate(self._full_shape):
            if i != max_dim:
                buffer_shape[i] = self._full_shape[i]
                max_dim_count /= self._full_shape[i]
        buffer_shape[max_dim] = max_dim_count
        self._count = buffer_shape
        self._max_dim = max_dim
        try:
            self._buffer = np.empty(self._full_shape)
        except:
-            raise ValueError("Cannot handle so many data points!") #FIXME
+            raise ValueError("Error reserving buffer! Cannot handle so many data points!") #FIXME
-    
+        print("init buffer")
    @property
    def fully_loaded(self):        
-        return self._buffer is not None and self._full_shape == self._offset
+        return np.all(self._buffer is not None and self._fetched_data == self._full_shape)
    def __str__(self) -> str:
        r = self._item_descriptor.name + " " + str(self._item_descriptor.entity_type)
--- a/nixview/util/file_handler.py
+++ b/nixview/util/file_handler.py
@ -1,4 +1,3 @@
 from nixview.file_utils import suggested_plotter
 import os
 import nixio as nix
 import numpy as np
@ -103,6 +102,8 @@ class FileHandler(metaclass=Singleton):
        for i, (o, c) in enumerate(zip(offset, count)):
            if o + c > shape[i]:
                valid_count[i] = shape[i] - o
            else:
                valid_count[i] = c
        return valid_count
    def count_is_valid(self, shape, offset, count):