From b376edb342449e32e85871294eaa46ab6834b3c7 Mon Sep 17 00:00:00 2001 From: Jan Grewe Date: Mon, 3 Aug 2020 18:25:47 +0200 Subject: [PATCH] [docs] more docs and getting started --- docs/configuration.md | 15 +++-- docs/getting_started.md | 138 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 146 insertions(+), 7 deletions(-) diff --git a/docs/configuration.md b/docs/configuration.md index 7013f66..411a2fc 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -1,9 +1,16 @@ -# Configuration +# Project configuration ## Database connection -* Should work with mysql or mariaDB databases -* if you do not have access to a database server, go back to index.md to read how to install one locally +* If you do not have access to a remote database server or want to run it locally, go to [index](index.md) for information on setting up the database server. + +Suppose that you intend to work on a project named `foo` use the terminal to navigate to the project directory: + +```bash +cd foo + + +``` To log into the database you need the `host`, the `database name`, the `user` name, and the `password`. Note, the selected user needs at least read access on the selected database (True? maybe also write?). @@ -47,4 +54,4 @@ Let's assume you are working remotely, i.e. connecting to a remote database on a In case you are working with a local database the ```database.host``` entry is ```"localhost"```. If you ommit ```database.user``` or ```database.password``` fishbook, respectively datajoint, will ask you for the user credentials every time you run your python scripts. -**Note:** This configuration file is pure text and can be easily read by anyone. It **must not** be part of a public repository. **Do not add it to your version control system!** +**IMPORTANT:** This configuration file is pure text and can be easily read by anyone. It **must not** be part of a public repository. **Do not add it to your version control system!** diff --git a/docs/getting_started.md b/docs/getting_started.md index ab83747..056e506 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -19,12 +19,144 @@ Fishbook has the following submodules: import fishbook as fb # the following command will find and load all (!) dataset entries in the database -datasets = fb.Dataset.find() -print(len(datasets)) +datasets, count = fb.Dataset.find() +print(count) # or use print(fb.Dataset.datasetCount()) +``` + +### Finding what you might look for + +For some classes, fetching and converting all matches to python objects of the *Dataset* class may take a while. You could then use a loop to iterate over the list of datasets and apply filters in python. This is fine, but has poor performance, better we use the underlying database for such restrictions. +Again using the *Dataset* class restrictions can be done like this: + +```python +# to test your restrictions you may want to set the test==True flag +_, count = fb.Dataset.find(quality="good", min_date="2018-01-01", max_date="2018-12-31", test=True) +print(count) + +# if you indeed want to fetch the results +datasets, count = fb.Dataset.find(quality="good", min_date="2018-01-01", max_date="2018-12-31") +print(len(datasets)) +``` + +You can use the dataset class as an entry point to explore what is stored in the database about this dataset, e.g. wich cell(s) was recorded and the subject infromation. + +```python +d = datasets[0] +print(d) + +cells = d.cells # get the cells recorded in this dataset +cell = cells[0] +print(cell) + +s = cell.subject +print(s) + +# in a given subject we may record several cells +print(len(s.cells)) +``` + +### Which RePros were run in a given dataset? + +In a dataset we may run several repros: +```python +repros = d.repro_runs() +for r in repros: + print(r) + +``` +in this example this leads to an output like, this. Two **Re**search **Pro**tocols were run, this first the "BaselineActivity" repro and the second the "ReceptiveField" repro. + +``` bash +RePro: BaselineActivity id: BaselineActivity_0 +run: 0 on cell: 2018-01-10-ag +start time: 0.0 duration: 40.0796 + +RePro: ReceptiveField id: ReceptiveField_0 +run: 0 on cell: 2018-01-10-ag +start time: 40.0796 duration: 236.42 +``` + +### Getting information about the stimuli + +The latter puts out a set of stimuli we may be interested in: + +```python +r = d.repro_runs()[-1] +stimuli = r.stimuli +print(len(stimuli)) # 90 in this example case +for stim in stimuli[:10]: # let's print the first 10 + print(stim) +``` + +An entry in the Stimuli table (see [ER-schema](database_layout.md) for details) is characterized by the stimulus id, the start time or start index (depends on the data source) and a stimulus duration. The stimulus also carries a textfield with the stimulus settings. + +```python +stim.settings + +>> 'ReceptiveField-1:\n\tReceptiveField-1:\n\t\tModality: electric\n\t\tSamplingRate: 37.59398496240602+- -1.000\n\t\tdur: 0.0+- -1.000\n\t\tampl: 0.0+- -1.000\n\t\tdeltaf: 0.0+- -1.000\n\t\tfreq: 0.0+- -1.000\n\t\tx_pos: 0.0+- -1.000\n\t\ty_pos: 0.0+- -1.000\n\t\tz_pos: 0.0+- -1.000\n\tEOD Rate: 826.6917939137279 Hz\n\tEOD Amplitude: 0.4263797848004206 mV\n\tGlobalEField: 0.0 V\n\tGlobalEFieldAM: 0.0 V\n\tLocalEField: 0.0 V\n\tI: 0.0 V\n\ttime: 294.9933 s\n\tdelay: 0.0 s\n\tintensity: 0.039810717055349734 mV/cm\n\tdur: 1.0 \n\tampl: 0.04 \n\tdeltaf: 20.0 \n\tfreq: 854.7108299738921 \n\tx_pos: 142.5 \n\ty_pos: 2.0 \n\tz_pos: -15.0 \n' + +# to convert it to yaml +import yaml +settings = yaml.safe_load(stim.settings.replace("\t", "")) +print(settings) +``` +The output: +```shell + {'ReceptiveField-1': None, + 'Modality': 'electric', + 'SamplingRate': '37.59398496240602+- -1.000', + 'dur': 1.0, + 'ampl': 0.04, + 'deltaf': 20.0, + 'freq': 854.7108299738921, + 'x_pos': 142.5, + 'y_pos': 2.0, + 'z_pos': -15.0, + 'EOD Rate': '826.6917939137279 Hz', + 'EOD Amplitude': '0.4263797848004206 mV', + 'GlobalEField': '0.0 V', + 'GlobalEFieldAM': '0.0 V', + 'LocalEField': '0.0 V', + 'I': '0.0 V', + 'time': '294.9933 s', + 'delay': '0.0 s', + 'intensity': '0.039810717055349734 mV/cm'} +``` + +### Finding the data +The *Stimulus* in conjunction with the *Dataset* provides all infromation needed to get the data from the recorded traces. + +The dataset can be located from the *Dataset* class: +```python +print(d.data_source) # the absolute path from which the data was imported +print(d.data_host) # the fully qaulified host name + +print(d.has_nix) # informs whether data is stored in a nix file. +``` + +Let's assume you have access to the dataset and it uses a [NIX](https://g-node.com/nix) file: + +```python +import os +import glob +import nixio as nix + +path = d.data_source +assert(os.path.exists(path)) +if (d.has_nix): + nix_file = glob.glob(path + os.sep + "*nix")[0] # there should only be one nix file + mtag_id = stim.multi_tag_id + position_index = stim.index + + nf = nix.File.open(nix_file, nix.FileMode.ReadOnly) + block = nf.blocks[0] # there is only a single block in relacs written files + mt = block.multi_tags[mtag_id] + data = mt.retrieve_data(position_index, "V-1")[:] # we are interested in the membrane voltage + nf.close() + print(data.shape) ``` -The Dataset is an entrypoint to the database