From 147ebd64cac5e18df325a121cca07188826d30b9 Mon Sep 17 00:00:00 2001
From: Jan Grewe <jan.grewe@g-node.org>
Date: Mon, 18 Jan 2021 09:55:47 +0100
Subject: [PATCH] [dataview] fix chunking and loading for n-d data

---
 nixview/constants.py         |  2 +-
 nixview/ui/plotscreen.py     |  2 +-
 nixview/util/dataview.py     | 61 +++++++++++++++++++++---------------
 nixview/util/file_handler.py |  3 +-
 4 files changed, 40 insertions(+), 28 deletions(-)

diff --git a/nixview/constants.py b/nixview/constants.py
index 787e7fa..6fa1d54 100644
--- a/nixview/constants.py
+++ b/nixview/constants.py
@@ -10,7 +10,7 @@ settings_recent_files_key = "/".join([organization, application, "recent_files"]
 settings_recent_file_max_count_key =  "/".join([organization, application, "recent_files_max_count"])
 settings_recent_file_max_count = 10
 
-io_chunksize = 10000000
+max_chunksize = 1000000000
 
 PACKAGE_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir))
 ICONS_FOLDER = os.path.join(PACKAGE_ROOT, "icons")
diff --git a/nixview/ui/plotscreen.py b/nixview/ui/plotscreen.py
index 212c961..62d88f4 100644
--- a/nixview/ui/plotscreen.py
+++ b/nixview/ui/plotscreen.py
@@ -347,7 +347,7 @@ class PlotScreen(QWidget):
         try:
             self._data_view = DataView(item, self._file_handler)
         except ValueError as e:
-            print(e)
+            print("error in plotscreen.plot", e)
             return
         self._data_view.request_more() # TODO this is just a test, needs to be removed
         print(self._data_view)
diff --git a/nixview/util/dataview.py b/nixview/util/dataview.py
index 2085f84..beec708 100644
--- a/nixview/util/dataview.py
+++ b/nixview/util/dataview.py
@@ -1,5 +1,5 @@
 import numpy as np
-from nixview.constants import io_chunksize as chunksize
+from nixview.constants import max_chunksize as chunksize
 
 class DataView():
 
@@ -10,8 +10,9 @@ class DataView():
         self._full_shape = item_descriptor.shape
         self._buffer = None
         self._offset = np.zeros(len(self._full_shape), dtype=int)
+        self._fetched_data = np.zeros(len(self._full_shape), dtype=int)
         self._count = None
-        self._max_dim = None
+        self._cut_dim = None
         self.init_buffer()
         self.request_more()
 
@@ -24,36 +25,46 @@ class DataView():
         sl = tuple([slice(o, o + c) for o, c in zip(self._offset, valid_count)])
         self._buffer[sl] = self._file_handler.request_data(self._item_descriptor, self._offset,
                                                            valid_count)
-        self._offset = tuple([sum(x) for x in zip(self._offset, self._count)])
-
+        new_ofst = np.zeros_like(self._offset)
+        for i, x in enumerate(zip(self._offset, valid_count)):
+            if i == self._cut_dim:
+                new_ofst[i] = sum(x)
         
-        #if data is not None and self._buffer is None:
-        #    self._buffer = data
-        #    self._offset = data.shape
-        #else:
-        #    from IPython import embed
-        #    embed()
+        self._offset = tuple(new_ofst)
+        self._fetched_data = tuple([sum(x) for x in zip(self._fetched_data, self._count)])
         
-    def init_buffer(self):
-        buffer_shape = np.zeros(len(self._full_shape), dtype=int)
-        max_dim_count = chunksize
-        max_dim = np.argmax(self._full_shape)
+    def init_chunking(self):
+        """decides on the chunks size for reading. Heuristic is based on the dimensionality of the data and the "best xdim" if available.
+        If data is 2D the best xdim is loaded in chunks (if necessary) while the other is fully loaded. For 3D and more it is the last dimension that is cut. If the number of data points in the first n-1 dimensions exceeds the maximum chunksize (settings) an error will be thrown.
+        """
+        max_element_count = chunksize
+        if self._item_descriptor.best_xdim is not None:
+            cut_dim = self._item_descriptor.best_xdim 
+        else:
+            cut_dim = len(self._full_shape) - 1
+            if np.prod(self._full_shape[:-1]) > chunksize:
+                raise ValueError("Cannot load data in chunks! maxchunksize too small: product of elements in first %i dimensions exceeds max chunksize! (%i > %i)" % (len(self._full_shape) -1, np.prod(self._full_shape[:-1]), chunksize))
+        chunk_shape = np.zeros(len(self._full_shape), dtype=int)
         for i, d in enumerate(self._full_shape):
-            if i != max_dim:
-                buffer_shape[i] = self._full_shape[i]
-                max_dim_count /= self._full_shape[i]
-        buffer_shape[max_dim] = max_dim_count
-        self._count = buffer_shape
-        self._max_dim = max_dim
+            if i != cut_dim:
+                chunk_shape[i] = d
+                max_element_count /= d
+
+        chunk_shape[cut_dim] = max_element_count
+        self._cut_dim = cut_dim
+        self._count = chunk_shape
+
+    def init_buffer(self):
+        self.init_chunking()
         try:
             self._buffer = np.empty(self._full_shape)
         except:
-            raise ValueError("Cannot handle so many data points!") #FIXME
-    
-    
+            raise ValueError("Error reserving buffer! Cannot handle so many data points!") #FIXME
+        print("init buffer")
+
     @property
-    def fully_loaded(self):
-        return self._buffer is not None and self._full_shape == self._offset
+    def fully_loaded(self):        
+        return np.all(self._buffer is not None and self._fetched_data == self._full_shape)
     
     def __str__(self) -> str:
         r = self._item_descriptor.name + " " + str(self._item_descriptor.entity_type)
diff --git a/nixview/util/file_handler.py b/nixview/util/file_handler.py
index 0cbf260..8e2dfe5 100644
--- a/nixview/util/file_handler.py
+++ b/nixview/util/file_handler.py
@@ -1,4 +1,3 @@
-from nixview.file_utils import suggested_plotter
 import os
 import nixio as nix
 import numpy as np
@@ -103,6 +102,8 @@ class FileHandler(metaclass=Singleton):
         for i, (o, c) in enumerate(zip(offset, count)):
             if o + c > shape[i]:
                 valid_count[i] = shape[i] - o
+            else:
+                valid_count[i] = c
         return valid_count
     
     def count_is_valid(self, shape, offset, count):