improve reading stream for pipeline

robin · robin · commit a2b7641d29a2 · 2019-01-19T19:34:16.000+01:00
diff --git a/docs/pipeline.md b/docs/pipeline.md
@@ -21,7 +21,7 @@ The constructor of the pipeline module will recognize which method is fit for ru
 | n | The `n` is the rowsize of chunks for reading the filterbank as stream. |
 | size | The size parameter is used for deciding the size of the filterbank. |
 
-After deciding which method to run for running the filterbank in a pipeline, it will measure the time it takes to run each method. At the end it will append the results to a txt file.
+After deciding which method to run for running the filterbank in a pipeline, it will measure the time it takes to run each method using `measure_method`. After running all the different methods, the constructor will append the results (a dictionary) to a txt file.
 
 ## 7.2 Read rows
 
@@ -33,20 +33,23 @@ pipeline.Pipeline(<filterbank_file>, as_stream=True)
 
 ## 7.3 Read n rows
 
-The `read_n_rows` method first splits all the filterbank data into chunks of n samples. After splitting the filterbank data in chunks, it will run the different modules of the pipeline for each chunk.
+The `read_n_rows` method first splits all the filterbank data into chunks of n samples. After splitting the filterbank data in chunks, it will run the different modules of the pipeline for each chunk. The remaining data, that which does not fit into the sample size, is currently ignored.
+
+The `n` or sample size should be a power of 2 multiplied with the given scale for the downsampling.
 
 ```
 pipeline.Pipeline(<filterbank_file>, n=<size> , as_stream=True)
 ```
 
 ## 7.4 Read static
 
-The `read_static` method reads the entire filterbank at once. If the filterbank file is too large for this method, the alternative is using `read_n_rows`.
+The `read_static` method reads the entire filterbank at once, and applies each method to the entire dataset. If the filterbank file is too large for running it in-memory, the alternative is using `read_n_rows`.
 
 ```
 pipeline.Pipeline(<filterbank_file>)
 ```
 
 ## 7.5 Measure methods
 
-The `measure_methods` is ran for each of the above methods, and calculates the time it takes to run each of the different methods.
+The `measure_methods` is ran for each of the above methods, and calculates the time it takes to run each of the different methods. For each method it will create a key using the name of the method, and save the time it took to run the method as a value.
+At the end, it will returns a dictionary with all the keys and values.
diff --git a/examples/run_pipeline.py b/examples/run_pipeline.py
@@ -11,15 +11,15 @@
 # init filterbank filename
 fil_name = "./pspm.fil"
 # init filterbank sample size
-sample_size = 1534
+sample_size = 49152
 # init times the pipeline should run
-n_times = 1000
+n_times = 10
 
 # run the filterbank n times
-for i in range(n):
+for i in range(n_times):
     # read static
     Pipeline(filename=fil_name, size=sample_size)
     # read stream, row per row
     Pipeline(filename=fil_name, as_stream=True)
     # read stream, n rows
-    Pipeline(filename=fil_name, as_stream=True, n=10)
+    Pipeline(filename=fil_name, as_stream=True, n=sample_size)
diff --git a/filterbank/filterbank.py b/filterbank/filterbank.py
@@ -25,8 +25,7 @@ def __init__(self, filename, freq_range=None, time_range=None, read_all=False):
         """
         if not os.path.isfile(filename):
             raise FileNotFoundError(filename)
-        # iterator for stream
-        self.stream_iter = 0
+        # header values
         self.data, self.freqs, self.n_chans_selected = None, None, None
         self.filename = filename
         self.header = read_header(filename)
@@ -51,6 +50,8 @@ def __init__(self, filename, freq_range=None, time_range=None, read_all=False):
         self.fil.seek(int(self.ii_start * self.n_bytes * self.n_ifs * self.n_chans), 1)
         # find possible channels
         self.i_0, self.i_1 = self.setup_chans(freq_range)
+        # number if stream iterations
+        self.stream_iter = (self.n_samples * self.n_ifs)
         # read filterbank at once
         if read_all:
             self.read_filterbank()
@@ -84,8 +85,8 @@ def next_row(self):
 
             returns False if EOF
         """
-        if self.stream_iter < (self.n_samples * self.n_ifs):
-            self.stream_iter += 1
+        if self.stream_iter > 0:
+            self.stream_iter -= 1
             # skip bytes
             self.fil.seek(self.n_bytes * self.i_0, 1)
             # read row of data
@@ -104,11 +105,8 @@ def next_n_rows(self, n_rows):
 
             returns False if EOF
         """
-        if self.stream_iter < (self.n_samples * self.n_ifs):
-            # more rows requested than available
-            if self.stream_iter + n_rows >= self.n_samples * self.n_ifs:
-                n_rows = self.n_samples * self.n_ifs - self.stream_iter
-            self.stream_iter += n_rows
+        if self.stream_iter - n_rows > 0:
+            self.stream_iter -= n_rows
             # init array of n rows
             data = np.zeros((n_rows, self.n_chans_selected), dtype=self.dd_type)
             for row in range(n_rows):
diff --git a/fourier/fourier.py b/fourier/fourier.py
@@ -80,6 +80,7 @@ def fft_vectorized(input_data, nfft=None, axis=-1):
             input_data = zeroes
 
     data_length = input_data.shape[0]
+    print(data_length)
 
     if np.log2(data_length) % 1 > 0:
         raise ValueError("Size of input data must be a power of 2")