diff --git a/docs/source/users_guide/processing_raw_file.rst b/docs/source/users_guide/processing_raw_file.rst index dacdcac..9227170 100644 --- a/docs/source/users_guide/processing_raw_file.rst +++ b/docs/source/users_guide/processing_raw_file.rst @@ -13,7 +13,7 @@ Processing a raw .sp2b file is as easy as: .. code-block:: python - waveforms = pysp2.io.read_sp2b(file_name) + waveforms = pysp2.io.read_sp2(file_name) Where file_name is the name of the file you want to read. The output is an xarray Dataset containing the waveforms in each channel for each particle. The diff --git a/pysp2/util/__init__.py b/pysp2/util/__init__.py index af8f912..68bba37 100644 --- a/pysp2/util/__init__.py +++ b/pysp2/util/__init__.py @@ -17,4 +17,4 @@ from .DMTGlobals import DMTGlobals from .particle_properties import calc_diams_masses, process_psds from .deadtime import deadtime -from .leo_fit import beam_shape \ No newline at end of file +from .leo_fit import beam_shape,leo_fit diff --git a/pysp2/util/leo_fit.py b/pysp2/util/leo_fit.py index fee4396..c636733 100644 --- a/pysp2/util/leo_fit.py +++ b/pysp2/util/leo_fit.py @@ -1,16 +1,18 @@ import numpy as np -from .peak_fit import _gaus from scipy.optimize import curve_fit +import xarray as xr +from .peak_fit import _gaus, _do_fit_records -def beam_shape(my_binary, beam_position_from='peak maximum', Globals=None): +def beam_shape(my_binary, beam_position_from='split point', Globals=None, + moving_average_window = 5, max_amplitude_fraction = 0.033): """ Calculates the beam shape needed to determine the laser intensity profile - used for leading-edge-only fits. The beam position is first determined - from the position of the peak. The beam profile is then calculated by - positioning all the peaks to that position. The beam shape is then the mean - of all the normalized profiles arround that position. + used for leading-edge-only fits. The beam position is by default determined + from the position of the peak relative to the split point. The beam profile + is then calculated by positioning all the peaks to that position. The beam + shape is then the mean of all the normalized profiles arround that position. Parameters ---------- @@ -23,98 +25,380 @@ def beam_shape(my_binary, beam_position_from='peak maximum', Globals=None): signal limits. beam_position_from : str + 'split point' = construct the beam profile around the split position. + The split position is taken from the split detector. Use this if split + detector is installed. 'peak maximum' = construct the beam profile arround the maximum peak poistion. The maximum peak position is determied from the peak-height - weighted average peak position. - 'split detector' = construct the beam profile around the split position. - The split position is taken from the split detector. Not working yet. + position. + + moving_average_window : int + Number of beam profiles to use to calculate the shape of the laser + beam. This is done using scattering only partilces. + + max_amplitude_fraction : float + How much of the signal amplitude to use for the leading edge. The + amplitude is normalized to be between 0 and 1 so a + max_amplitude_fraction of 0.03 means that 3% of the signal is used. Returns ------- - coeff : numpy array with gaussian fit [amplitude, peakpos, width, base] - of the beam shape profile. - beam_profile : numpy array with the beam profile calculated from the mean - of all the profiles. The array has as many data points as - the input data. - + my_binary : xarray Dataset + Dataset with additional statistics and information about the + laser beam profile, splitpoint positions relative to the beam + profile etc. These are needed for the actual leo_fit() function. + All variables that are added to the xarray Dataset begin with + "leo_". These leo_ variables are available for all particles, hence + making the leo fit possible for incandesence particles as well. + """ - num_base_pts_2_avg = 5 - bins=my_binary['columns'] + num_base_pts_2_avg = 10 - #boolean array for ok particles in the high gain channel - scatter_high_gain_accepted = np.logical_and.reduce(( + scatter_ch0_accepted = np.logical_and.reduce(( my_binary['PkFWHM_ch0'].values > Globals.ScatMinWidth, my_binary['PkFWHM_ch0'].values < Globals.ScatMaxWidth, my_binary['PkHt_ch0'].values > Globals.ScatMinPeakHt1, my_binary['PkHt_ch0'].values < Globals.ScatMaxPeakHt1, my_binary['FtPos_ch0'].values < Globals.ScatMaxPeakPos, my_binary['FtPos_ch0'].values > Globals.ScatMinPeakPos)) + + scatter_ch4_accepted = np.logical_and.reduce(( + my_binary['PkFWHM_ch4'].values > Globals.ScatMinWidth, + my_binary['PkFWHM_ch4'].values < Globals.ScatMaxWidth, + my_binary['PkHt_ch4'].values > Globals.ScatMinPeakHt2, + my_binary['PkHt_ch4'].values < Globals.ScatMaxPeakHt2, + my_binary['FtPos_ch4'].values < Globals.ScatMaxPeakPos, + my_binary['FtPos_ch4'].values > Globals.ScatMinPeakPos)) + + #no incandesence signal = True + no_incand_trigged = np.logical_and( + my_binary['PkHt_ch1'].values < Globals.IncanMinPeakHt1, + my_binary['PkHt_ch5'].values < Globals.IncanMinPeakHt2) - #boolean array that is True for particles that have not been triggered by - #the high gain incandesence channel - no_incand_trigged = my_binary['PkHt_ch1'].values < Globals.IncanMinPeakHt1 - - #find good events that only scatter light - only_scattering_high_gain = np.logical_and.reduce((scatter_high_gain_accepted, + #Particles that only scatter light + only_scattering_ch0 = np.logical_and.reduce((scatter_ch0_accepted, no_incand_trigged)) - + only_scattering_ch4 = np.logical_and.reduce((scatter_ch4_accepted, + no_incand_trigged)) + + iloc_ch0 = np.argwhere(only_scattering_ch0).flatten() + iloc_ch4 = np.argwhere(only_scattering_ch4).flatten() + print('High gain scattering particles for beam analysis :: ', - np.sum(only_scattering_high_gain)) + np.sum(only_scattering_ch0)) + print('Low gain scattering particles for beam analysis :: ', + np.sum(only_scattering_ch4)) #make an xarray of the purely scattering particles - my_high_gain_scatterers = my_binary.sel(index = only_scattering_high_gain, - event_index = only_scattering_high_gain) + my_ch0_scatterers = my_binary.sel(event_index = only_scattering_ch0) + my_ch4_scatterers = my_binary.sel(event_index = only_scattering_ch4) #numpy array for the normalized beam profiels - my_high_gain_profiles = np.zeros((my_high_gain_scatterers.dims['index'], - my_high_gain_scatterers.dims['columns'])) \ + my_ch0_profiles = np.zeros((my_ch0_scatterers.sizes['event_index'], #was index + my_ch0_scatterers.sizes['columns'])) \ * np.nan - - #weighted mean of beam peak position. Weight is scattering amplitude. - high_gain_peak_pos = int( - np.sum(np.multiply(my_high_gain_scatterers['PkPos_ch0'].values, - my_high_gain_scatterers['PkHt_ch0'].values))/ \ - np.sum(my_high_gain_scatterers['PkHt_ch0'].values)) - + my_ch4_profiles = np.zeros((my_ch4_scatterers.sizes['event_index'], #was index + my_ch4_scatterers.sizes['columns'])) \ + * np.nan + + mean_ch0_max_peak_pos = int(np.nanmean(my_ch0_scatterers['PkPos_ch0'].values)) + mean_ch0_split_pos_float = np.nanmean(my_ch0_scatterers['PkSplitPos_ch3'].values) + mean_ch0_split_pos = np.round(mean_ch0_split_pos_float).astype(np.int32) + mean_ch4_max_peak_pos = int(np.nanmean(my_ch4_scatterers['PkPos_ch4'].values)) + mean_ch4_split_pos_float = np.nanmean(my_ch4_scatterers['PkSplitPos_ch7'].values) + mean_ch4_split_pos = np.round(mean_ch4_split_pos_float).astype(np.int32) + + #cross to center + ch0_c2c = my_ch0_scatterers['FtPos_ch0'].values - my_ch0_scatterers['PkSplitPos_ch3'].values + ch4_c2c = my_ch4_scatterers['FtPos_ch4'].values - my_ch4_scatterers['PkSplitPos_ch7'].values + #loop through all particle events - for i in my_high_gain_scatterers['event_index']: - data = my_high_gain_scatterers['Data_ch0'].sel(event_index=i).values + for i in my_ch0_scatterers['event_index']: + data_ch0 = my_ch0_scatterers['Data_ch0'].sel(event_index=i).values + #base level + base_ch0 = np.mean(data_ch0[0:num_base_pts_2_avg]) + #peak height + peak_height_ch0 = data_ch0.max() - base_ch0 + #max peak position + peak_pos_ch0 = data_ch0.argmax() + #split position + split_pos_ch3 = my_ch0_scatterers['PkSplitPos_ch3'].sel(event_index=i).values + if split_pos_ch3 > peak_pos_ch0: + continue + #normalize the profile to range [~0,1] + profile_ch0 = (data_ch0 - base_ch0) / peak_height_ch0 + #distance to the mean beam peak position + if beam_position_from == 'peak maximum': + peak_difference_ch0 = mean_ch0_max_peak_pos - peak_pos_ch0 + elif beam_position_from == 'split point': + peak_difference_ch0 = mean_ch0_split_pos - split_pos_ch3 + #insert so that the peak is at the right position (accounts for + #particles travelling at different speeds) + if peak_difference_ch0 > 0: + my_ch0_profiles[i, peak_difference_ch0:] = profile_ch0[:len(data_ch0) - + peak_difference_ch0] + elif peak_difference_ch0 < 0: + my_ch0_profiles[i, :len(data_ch0)+peak_difference_ch0] = profile_ch0[-peak_difference_ch0:] + else: + my_ch0_profiles[i, :] = profile_ch0 + + for i in my_ch4_scatterers['event_index']: + data_ch4 = my_ch4_scatterers['Data_ch4'].sel(event_index=i).values #base level - base = np.mean(data[0:num_base_pts_2_avg]) + base_ch4 = np.mean(data_ch4[0:num_base_pts_2_avg]) #peak height - peak_height = data.max()-base - #peak position - peak_pos = data.argmax() - #normalize the profile to range [0,1] - profile = (data - base) / peak_height + peak_height_ch4 = data_ch4.max() - base_ch4 + #max peak position + peak_pos_ch4 = data_ch4.argmax() + #split position + split_pos_ch7 = my_ch4_scatterers['PkSplitPos_ch7'].sel(event_index=i).values + if split_pos_ch7 > peak_pos_ch4: + continue + #normalize the profile to range [~0,1] + profile_ch4 = (data_ch4 - base_ch4) / peak_height_ch4 + #insert the profile as it was recorded (no shifting due to PEAK POSITION or PSD POSITION) #distance to the mean beam peak position - peak_difference = high_gain_peak_pos - peak_pos + if beam_position_from == 'peak maximum': + peak_difference_ch4 = mean_ch4_max_peak_pos - peak_pos_ch4 + elif beam_position_from == 'split point': + peak_difference_ch4 = mean_ch4_split_pos - split_pos_ch7 #insert so that the peak is at the right position (accounts for #particles travelling at different speeds) - if peak_difference > 0: - my_high_gain_profiles[i, peak_difference:] = profile[:len(data) - - peak_difference] - elif peak_difference < 0: - my_high_gain_profiles[i, :len(data)+peak_difference] = profile[-peak_difference:] + if peak_difference_ch4 > 0: + my_ch4_profiles[i, peak_difference_ch4:] = profile_ch4[:len(data_ch4) - + peak_difference_ch4] + elif peak_difference_ch4 < 0: + my_ch4_profiles[i, :len(data_ch4)+peak_difference_ch4] = profile_ch4[-peak_difference_ch4:] else: - my_high_gain_profiles[i, :] = profile - - #get the beam profile - beam_profile = np.nanmean(my_high_gain_profiles, axis=0) - #find values that are lower than 5% of the max value. - low_values = np.argwhere(beam_profile < 0.05) - #fit the gaussian curve to the beginning of the profile only. The tail - #can deviate from zero substantially and is not of interest. - fit_to = low_values[low_values > high_gain_peak_pos].min() - - #initial guess - p0 = np.array([beam_profile.max() - beam_profile.min(), - np.argmax(beam_profile), 20., - np.nanmin(beam_profile)]).astype(float) - #fit gaussian curve - #coeff[amplitude, peakpos, width , baseline] - coeff, var_matrix = curve_fit(_gaus, bins[:fit_to], beam_profile[:fit_to], - p0=p0, method='lm', maxfev=40, ftol=1e-3) - - return coeff, beam_profile \ No newline at end of file + my_ch4_profiles[i, :] = profile_ch4 + + + #MOVING AVERAGE OF THE BEAM PROFILE TO FIND THE DISTANCE BETWEEN THE SPLIT POINT AND THE POINT IN THE + #LASER BEAM WHERE PARTICLES CAN START TO EVAPORATE + + #moving average of the beam shape with a window of moving_average_window + moving_ch0_profile_window = np.lib.stride_tricks.sliding_window_view(my_ch0_profiles, + moving_average_window, axis=0) + moving_avg_ch0_profiles_ = np.nanmedian(moving_ch0_profile_window,axis=2) + moving_ch4_profile_window = np.lib.stride_tricks.sliding_window_view(my_ch4_profiles, + moving_average_window, axis=0) + moving_avg_ch4_profiles_ = np.nanmedian(moving_ch4_profile_window,axis=2) + + moving_avg_ch0_profiles = np.zeros_like(moving_avg_ch0_profiles_) * np.nan + moving_avg_ch0_split_to_leo_pos = np.zeros(moving_avg_ch0_profiles_.shape[0]) * np.nan + moving_avg_ch0_max_leo_pos = np.zeros(moving_avg_ch0_profiles_.shape[0]) * np.nan + + moving_avg_ch4_profiles = np.zeros_like(moving_avg_ch4_profiles_) * np.nan + moving_avg_ch4_split_to_leo_pos = np.zeros(moving_avg_ch4_profiles_.shape[0]) * np.nan + moving_avg_ch4_max_leo_pos = np.zeros(moving_avg_ch4_profiles_.shape[0]) * np.nan + + moving_avg_ch0_max_leo_amplitude_factor = np.zeros(moving_avg_ch0_profiles_.shape[0]) * np.nan + moving_avg_ch4_max_leo_amplitude_factor = np.zeros(moving_avg_ch4_profiles_.shape[0]) * np.nan + + for i in range(moving_avg_ch0_profiles_.shape[0]): + i_profile = moving_avg_ch0_profiles_[i,:] + i_max = np.nanargmax(i_profile) + i_range = i_profile[i_max] - np.nanmin(i_profile[:i_max]) + moving_avg_ch0_profiles[i,:] = (i_profile - np.nanmin(i_profile[:i_max])) / i_range + #interpolate here to get the exact position in fraction (not integer) :: which posiiton (float) is the 0.03 cross in + #and skip if it is the where the baseline is calculated + above_max_leo_pos = np.ndarray.flatten(np.argwhere(moving_avg_ch0_profiles[i,:] >= max_amplitude_fraction)) + moving_avg_ch0_max_leo_pos[i] = above_max_leo_pos[above_max_leo_pos>num_base_pts_2_avg].min()-1 + + moving_avg_ch0_split_to_leo_pos[i] = moving_avg_ch0_max_leo_pos[i] - mean_ch0_split_pos + moving_avg_ch0_max_leo_amplitude_factor[i] = 1./ moving_avg_ch0_profiles[i, np.round(moving_avg_ch0_max_leo_pos[i]).astype(int)] + + for i in range(moving_avg_ch4_profiles_.shape[0]): + i_profile = moving_avg_ch4_profiles_[i,:] + i_max = np.nanargmax(i_profile) + i_range = i_profile[i_max] - np.nanmin(i_profile[:i_max]) + moving_avg_ch4_profiles[i,:] = (i_profile - np.nanmin(i_profile[:i_max])) / i_range + #interpolate here to get the exact position in fraction (not integer) :: which posiiton (float) is the 0.03 cross in + #and skip if it is the where the baseline is calculated + above_max_leo_pos = np.ndarray.flatten(np.argwhere(moving_avg_ch4_profiles[i,:] >= max_amplitude_fraction)) + moving_avg_ch4_max_leo_pos[i] = above_max_leo_pos[above_max_leo_pos>num_base_pts_2_avg].min()-1 + + moving_avg_ch4_split_to_leo_pos[i] = moving_avg_ch4_max_leo_pos[i] - mean_ch4_split_pos + moving_avg_ch4_max_leo_amplitude_factor[i] = 1./ moving_avg_ch4_profiles[i, np.round(moving_avg_ch4_max_leo_pos[i]).astype(int)] + + #cleaning up + moving_avg_ch0_max_leo_pos = np.where(moving_avg_ch0_max_leo_pos < num_base_pts_2_avg, + np.nan, moving_avg_ch0_max_leo_pos) + moving_avg_ch0_split_to_leo_pos = np.where(moving_avg_ch0_split_to_leo_pos < -30. , + np.nan, moving_avg_ch0_split_to_leo_pos) + moving_avg_ch0_max_leo_amplitude_factor = np.where(moving_avg_ch0_max_leo_pos < num_base_pts_2_avg, + np.nan, moving_avg_ch0_max_leo_amplitude_factor) + + moving_avg_ch4_max_leo_pos = np.where(moving_avg_ch4_max_leo_pos < num_base_pts_2_avg, + np.nan, moving_avg_ch4_max_leo_pos) + moving_avg_ch4_split_to_leo_pos = np.where(moving_avg_ch4_split_to_leo_pos < -30. , + np.nan, moving_avg_ch4_split_to_leo_pos) + moving_avg_ch4_max_leo_amplitude_factor = np.where(moving_avg_ch4_max_leo_pos < num_base_pts_2_avg, + np.nan, moving_avg_ch4_max_leo_amplitude_factor) + + #moving average of beam width + moving_ch0_beam_width = np.lib.stride_tricks.sliding_window_view(my_ch0_scatterers['PkFWHM_ch0'].values, + moving_average_window, axis=0) + moving_ch4_beam_width = np.lib.stride_tricks.sliding_window_view(my_ch4_scatterers['PkFWHM_ch4'].values, + moving_average_window, axis=0) + moving_median_ch0_beam_width = np.nanmedian(moving_ch0_beam_width,axis=1) + moving_median_ch4_beam_width = np.nanmedian(moving_ch4_beam_width,axis=1) + + #Moving cross to centre (c2c) + moving_ch0_c2c = np.lib.stride_tricks.sliding_window_view(ch0_c2c, + moving_average_window, axis=0) + moving_ch4_c2c = np.lib.stride_tricks.sliding_window_view(ch4_c2c, + moving_average_window, axis=0) + + moving_median_ch0_c2c = np.nanmedian(moving_ch0_c2c,axis=1) + moving_median_ch4_c2c = np.nanmedian(moving_ch4_c2c,axis=1) + + leo_AmpFactor_ch0 = np.zeros(scatter_ch0_accepted.shape)*np.nan + leo_AmpFactor_ch0[iloc_ch0[:-moving_average_window+1]] = moving_avg_ch0_max_leo_amplitude_factor + leo_AmpFactor_ch4 = np.zeros(scatter_ch4_accepted.shape)*np.nan + leo_AmpFactor_ch4[iloc_ch4[:-moving_average_window+1]] = moving_avg_ch4_max_leo_amplitude_factor + + leo_PkFWHM_ch0 = np.zeros(scatter_ch0_accepted.shape)*np.nan + leo_PkFWHM_ch0[iloc_ch0[:-moving_average_window+1]] = moving_median_ch0_beam_width + leo_PkFWHM_ch4 = np.zeros(scatter_ch4_accepted.shape)*np.nan + leo_PkFWHM_ch4[iloc_ch4[:-moving_average_window+1]] = moving_median_ch4_beam_width + + leo_c2c_ch0 = np.zeros(scatter_ch0_accepted.shape)*np.nan + leo_c2c_ch0[iloc_ch0[:-moving_average_window+1]] = moving_median_ch0_c2c + leo_c2c_ch4 = np.zeros(scatter_ch4_accepted.shape)*np.nan + leo_c2c_ch4[iloc_ch4[:-moving_average_window+1]] = moving_median_ch4_c2c + + leo_split_to_leo_ch0 = np.zeros(scatter_ch0_accepted.shape)*np.nan + leo_split_to_leo_ch0[iloc_ch0[:-moving_average_window+1]] = moving_avg_ch0_split_to_leo_pos + leo_split_to_leo_ch4 = np.zeros(scatter_ch4_accepted.shape)*np.nan + leo_split_to_leo_ch4[iloc_ch4[:-moving_average_window+1]] = moving_avg_ch4_split_to_leo_pos + + output_ds = my_binary.copy() + + output_ds['leo_AmpFactor_ch0'] = (('event_index'), leo_AmpFactor_ch0) + output_ds['leo_AmpFactor_ch0'] = output_ds['leo_AmpFactor_ch0'].interpolate_na(dim="event_index", + method="nearest", fill_value="extrapolate") + output_ds['leo_AmpFactor_ch4'] = (('event_index'), leo_AmpFactor_ch4) + output_ds['leo_AmpFactor_ch4'] = output_ds['leo_AmpFactor_ch4'].interpolate_na(dim="event_index", + method="nearest", fill_value="extrapolate") + output_ds['leo_PkFWHM_ch0'] = (('event_index'), leo_PkFWHM_ch0) + output_ds['leo_PkFWHM_ch4'] = (('event_index'), leo_PkFWHM_ch4) + + #distance from cross-to-centre (split point to laser maximum intensity). + #This comes from scattering only partilces + output_ds['leo_PkPos_ch0'] = (('event_index'), leo_c2c_ch0) + output_ds['leo_PkPos_ch4'] = (('event_index'), leo_c2c_ch4) + #interpolate to all particles (including, and most importantly, rBC containing particles) + output_ds['leo_PkPos_ch0'] = output_ds['leo_PkPos_ch0'].interpolate_na(dim="event_index", + method="nearest", fill_value="extrapolate") + output_ds['leo_PkPos_ch4'] = output_ds['leo_PkPos_ch4'].interpolate_na(dim="event_index", + method="nearest", fill_value="extrapolate") + #add split position to cross-to-centre distance to get location of the (estimated) + #location of the peak maximum. This is needed for the LEO-fit. + output_ds['leo_PkPos_ch0'] = output_ds['leo_PkPos_ch0'] + my_binary['PkSplitPos_ch3'] + output_ds['leo_PkPos_ch4'] = output_ds['leo_PkPos_ch4'] + my_binary['PkSplitPos_ch7'] + + #First add t_alpha_to_split data + output_ds['leo_EndPos_ch0'] = (('event_index'), leo_split_to_leo_ch0) + output_ds['leo_EndPos_ch4'] = (('event_index'), leo_split_to_leo_ch4) + #interpolate to all particles + output_ds['leo_EndPos_ch0'] = output_ds['leo_EndPos_ch0'].interpolate_na(dim="event_index", + method="nearest", fill_value="extrapolate") + output_ds['leo_EndPos_ch4'] = output_ds['leo_EndPos_ch4'].interpolate_na(dim="event_index", + method="nearest", fill_value="extrapolate") + + #calculate the position at which the leo fits should end based on the split position + #(of all particles) + output_ds['leo_EndPos_ch0'] = output_ds['leo_EndPos_ch0'] + output_ds['PkSplitPos_ch3'] + output_ds['leo_EndPos_ch4'] = output_ds['leo_EndPos_ch4'] + output_ds['PkSplitPos_ch7'] + #cleaning up + output_ds['leo_EndPos_ch0'].values = np.where(output_ds['leo_EndPos_ch0'].values < num_base_pts_2_avg, + np.nan, output_ds['leo_EndPos_ch0'].values) + output_ds['leo_EndPos_ch4'].values = np.where(output_ds['leo_EndPos_ch4'].values < num_base_pts_2_avg, + np.nan, output_ds['leo_EndPos_ch4'].values) + + output_ds['leo_PkFWHM_ch0'] = output_ds['leo_PkFWHM_ch0'].interpolate_na(dim="event_index", + method="nearest", fill_value="extrapolate") + output_ds['leo_PkFWHM_ch4'] = output_ds['leo_PkFWHM_ch4'].interpolate_na(dim="event_index", + method="nearest", fill_value="extrapolate") + output_ds['leo_PkPos_ch0'] = output_ds['leo_PkPos_ch0'].interpolate_na(dim="event_index", + method="nearest", fill_value="extrapolate") + output_ds['leo_PkPos_ch4'] = output_ds['leo_PkPos_ch4'].interpolate_na(dim="event_index", + method="nearest", fill_value="extrapolate") + + return output_ds + +def leo_fit(my_binary,Globals=None): + bins = my_binary['columns'].astype('float').values + #number of points at the beginning to use for base line average + num_base_pts_2_avg = 10 + + #get the information about the gaussian fits + pos_ch0 = my_binary['leo_PkPos_ch0'].values + pos_ch4 = my_binary['leo_PkPos_ch4'].values + + width_ch0 = my_binary['leo_PkFWHM_ch0'].values / 2.35482 #2*sqrt(2*log(2)) + width_ch4 = my_binary['leo_PkFWHM_ch4'].values / 2.35482 #2*sqrt(2*log(2)) + data_ch0 = my_binary['Data_ch0'].values + data_ch4 = my_binary['Data_ch4'].values + + #mean of the first num_base_pts_2_avg points + data_ch0_sorted = np.sort(data_ch0[:, 0:num_base_pts_2_avg], axis=1) + data_ch4_sorted = np.sort(data_ch4[:, 0:num_base_pts_2_avg], axis=1) + leo_base_ch0 = np.min(data_ch0_sorted[:, 0:int(num_base_pts_2_avg)], axis=1) + leo_base_ch4 = np.min(data_ch4_sorted[:, 0:int(num_base_pts_2_avg)], axis=1) + + leo_fit_max_pos_ch0 = my_binary['leo_EndPos_ch0'].astype(int).values + leo_fit_max_pos_ch4 = my_binary['leo_EndPos_ch4'].astype(int).values + leo_AmpFactor_ch0 = my_binary['leo_AmpFactor_ch0'].values + leo_AmpFactor_ch4 = my_binary['leo_AmpFactor_ch4'].values + leo_PkHt_ch0 = np.zeros_like(my_binary['PkHt_ch0'].values)*np.nan + leo_PkHt_ch4 = np.zeros_like(my_binary['PkHt_ch4'].values)*np.nan + + #High gain + for i in range(my_binary.sizes['event_index']): + bins_ = bins[leo_fit_max_pos_ch0[i]-3:leo_fit_max_pos_ch0[i]] + if len(bins_) < 2: + leo_PkHt_ch0[i] = np.nan + continue + #signals + data_ch0_ = data_ch0[i, num_base_pts_2_avg:leo_fit_max_pos_ch0[i]] + data_ch0_ = data_ch0[i, leo_fit_max_pos_ch0[i]-3:leo_fit_max_pos_ch0[i]] + leo_coeff, var_matrix = curve_fit( + lambda x, a: _gaus(x, a, pos_ch0[i], width_ch0[i], leo_base_ch0[i]), + bins_[:], data_ch0_[:], p0=[data_ch0[i,:].max()], maxfev=100, + ftol=1e-5, method='lm' ) #, bounds=(0, 1e6)) #, method='lm' + leo_PkHt_ch0[i] = leo_coeff[0] + leo_PkHt_ch0[i] = (data_ch0[i, leo_fit_max_pos_ch0[i]] - leo_base_ch0[i]) * leo_AmpFactor_ch0[i] + + #Low gain + for i in range(my_binary.sizes['event_index']): + bins_ = bins[leo_fit_max_pos_ch4[i]-3:leo_fit_max_pos_ch4[i]] + if len(bins_) < 2: + leo_PkHt_ch4[i] = np.nan + continue + #signals + data_ch4_ = data_ch4[i, num_base_pts_2_avg:leo_fit_max_pos_ch4[i]] + data_ch4_ = data_ch4[i, leo_fit_max_pos_ch4[i]-3:leo_fit_max_pos_ch4[i]] + leo_coeff, var_matrix = curve_fit( + lambda x, a: _gaus(x, a, pos_ch4[i], width_ch4[i], leo_base_ch4[i]), + bins_[:], data_ch4_[:], p0=[data_ch4[i,:].max()], maxfev=100, + ftol=1e-5, method='lm' ) #, bounds=(0, 1e6)) #, method='lm' + leo_PkHt_ch4[i] = leo_coeff[0] + leo_PkHt_ch4[i] = (data_ch4[i, leo_fit_max_pos_ch4[i]] - leo_base_ch4[i]) * leo_AmpFactor_ch4[i] + + #Only positive and finite values + leo_PkHt_ch0 = np.where(leo_PkHt_ch0 > 0, leo_PkHt_ch0, np.nan) + leo_PkHt_ch4 = np.where(leo_PkHt_ch4 > 0, leo_PkHt_ch4, np.nan) + leo_PkHt_ch0 = np.where(leo_PkHt_ch0 < np.inf, leo_PkHt_ch0, np.nan) + leo_PkHt_ch4 = np.where(leo_PkHt_ch4 < np.inf, leo_PkHt_ch4, np.nan) + + output_ds = my_binary.copy() + output_ds['leo_FtAmp_ch0'] = (('index'), leo_PkHt_ch0) + output_ds['leo_FtAmp_ch4'] = (('index'), leo_PkHt_ch4) + output_ds['leo_Base_ch0'] = (('index'), leo_base_ch0) + output_ds['leo_Base_ch4'] = (('index'), leo_base_ch4) + + return output_ds + diff --git a/pysp2/util/particle_properties.py b/pysp2/util/particle_properties.py index 04ca08f..c4a4a5c 100644 --- a/pysp2/util/particle_properties.py +++ b/pysp2/util/particle_properties.py @@ -3,10 +3,12 @@ import datetime import pandas as pd import warnings +from .leo_fit import beam_shape, leo_fit + from .DMTGlobals import DMTGlobals -def calc_diams_masses(input_ds, debug=True, factor=1.0, Globals=None): +def calc_diams_masses(input_ds, debug=True, factor=1.0, Globals=None, leo_fits=False): """ Calculates the scattering and incandescence diameters/BC masses for each particle. @@ -22,6 +24,8 @@ def calc_diams_masses(input_ds, debug=True, factor=1.0, Globals=None): Globals: DMTGlobals structure or None DMTGlobals structure containing calibration coefficients. Set to None to use default values for MOSAiC. + leo_fits: boolean + Default is False. If True, LEO-fits will be calculated as well. Returns ------- @@ -34,6 +38,9 @@ def calc_diams_masses(input_ds, debug=True, factor=1.0, Globals=None): rejectFtPosTotal = 0 if Globals is None: Globals = DMTGlobals() + if leo_fits: + input_ds = beam_shape(input_ds, beam_position_from='split point', Globals=Globals) + input_ds = leo_fit(input_ds, Globals=Globals) PkHt_ch0 = np.nanmax(np.stack([input_ds['PkHt_ch0'].values, input_ds['FtAmp_ch0'].values]), axis=0) PkHt_ch4 = np.nanmax(np.stack([input_ds['PkHt_ch4'].values, input_ds['FtAmp_ch4'].values]), axis=0) @@ -60,11 +67,13 @@ def calc_diams_masses(input_ds, debug=True, factor=1.0, Globals=None): print("Number of scattering particles rejected for peak pos. = %d" % rejectFtPosTotal) PkHt_ch1 = input_ds['PkHt_ch1'].values + rejectMinIncandTotal = np.sum(PkHt_ch0 < Globals.IncanMinPeakHt1) PkHt_ch5 = input_ds['PkHt_ch5'].values width_ch1 = input_ds['PkEnd_ch1'].values - input_ds['PkStart_ch1'].values width_ch5 = input_ds['PkEnd_ch5'].values - input_ds['PkStart_ch5'].values width = np.where(np.isnan(width_ch1),width_ch5,width_ch1) accepted_incand = width >= Globals.IncanMinWidth + rejectIncandWidthTotal = np.sum(width < Globals.IncanMinWidth) accepted_incand = np.logical_and(accepted_incand, input_ds['PkHt_ch2'].values >= Globals.IncanMinPeakHt1) accepted_incand = np.logical_and(accepted_incand, @@ -85,6 +94,13 @@ def calc_diams_masses(input_ds, debug=True, factor=1.0, Globals=None): input_ds['PkPos_ch5'].values > Globals.IncanMinPeakPos, input_ds['PkPos_ch5'].values < Globals.IncanMaxPeakPos)) accepted_incand = np.logical_or(unsat_incand, sat_incand) + numIncandFlag = np.sum(accepted_incand) + + if debug: + print("Number of incandescence particles accepted = %d" % numIncandFlag) + print("Number of incandescence particles rejected for min. peak height = %d" % rejectMinIncandTotal) + print("Number of incandescence particles rejected for peak width = %d" % rejectIncandWidthTotal) + Scat_not_sat = 1e-18 * (Globals.c0Scat1 + Globals.c1Scat1*PkHt_ch0 + Globals.c2Scat1*PkHt_ch0**2) Scat_sat = 1e-18 * (Globals.c0Scat2 + Globals.c1Scat2*PkHt_ch4 + Globals.c2Scat2*PkHt_ch4**2) @@ -101,6 +117,16 @@ def calc_diams_masses(input_ds, debug=True, factor=1.0, Globals=None): output_ds['ScatDiaBC50'] = xr.DataArray(1000*(0.013416 + 25.066*(Scatter**0.18057)), dims=['index']) + if leo_fits: + leo_PkHt_ch0 = input_ds['leo_FtAmp_ch0'] + leo_PkHt_ch4 = input_ds['leo_FtAmp_ch4'] + leo_Scat_not_sat = 1e-18 * (Globals.c0Scat1 + Globals.c1Scat1*leo_PkHt_ch0 + Globals.c2Scat1*leo_PkHt_ch0**2) + leo_Scat_sat = 1e-18 * (Globals.c0Scat2 + Globals.c1Scat2*leo_PkHt_ch4 + Globals.c2Scat2*leo_PkHt_ch4**2) + leo_Scatter = np.where(PkHt_ch0 < Globals.ScatMaxPeakHt1, leo_Scat_not_sat, leo_Scat_sat) + #leo_Scatter = np.where(accepted_incand, leo_Scatter, np.nan) + output_ds['leo_ScatDiaSO4'] = xr.DataArray(1000 * (-0.015256 + 16.835 * leo_Scatter ** 0.15502), + dims=['index']) + sootMass_not_sat = factor * 1e-3 * ( Globals.c0Mass1 + Globals.c1Mass1*PkHt_ch1 + Globals.c2Mass1*PkHt_ch1**2) sootDiam_not_sat = (sootMass_not_sat/(0.5236e-9*Globals.densityBC))**(1./3.) @@ -122,7 +148,8 @@ def calc_diams_masses(input_ds, debug=True, factor=1.0, Globals=None): return output_ds -def process_psds(particle_ds, hk_ds, config, deltaSize=0.005, num_bins=199, avg_interval=10,deadtime_correction=False): +def process_psds(particle_ds, hk_ds, config, deltaSize=0.005, num_bins=199, + avg_interval=10,deadtime_correction=False, leo_fits=False): """ Processes the Scattering and BC mass size distributions: @@ -219,7 +246,9 @@ def process_psds(particle_ds, hk_ds, config, deltaSize=0.005, num_bins=199, avg_ MassScat2total = np.zeros_like(time_bins[:-1]) MassIncand2 = np.zeros_like(time_bins[:-1]) MassIncand2Sat = np.zeros_like(time_bins[:-1]) - + if leo_fits: + leo_IncandScatNumEnsemble = np.zeros((len(time_bins[:-1]), num_bins, num_bins)) + for t in range(len(time_bins) - 1): parts_time = np.logical_and( time_wave >= time_bins[t], time_wave <= time_bins[t + 1]) @@ -262,6 +291,25 @@ def process_psds(particle_ds, hk_ds, config, deltaSize=0.005, num_bins=199, avg_ np.add.at(IncanMassEnsemble[t,:], ind, np.multiply( OneOfEvery[the_particles_incan], sootMass[the_particles_incan])) + if leo_fits: + leo_ScatDiaSO4 = particle_ds['leo_ScatDiaSO4'].values / 1000. + leo_scatter_accept = ~np.isnan(leo_ScatDiaSO4) + + the_particles = np.logical_and.reduce((parts_time, incand_accept, leo_scatter_accept)) + # Remove oversize particles (incand) + the_particles = np.logical_and.reduce( + (the_particles, SizeIncandOnly < SpecSizeBins[-1] + deltaSize / 2)) + # Remove oversize particles (leo scatter size) + the_particles = np.logical_and.reduce( + (the_particles, leo_ScatDiaSO4 < SpecSizeBins[-1] + deltaSize / 2)) + ind_leo = np.searchsorted(SpecSizeBins+deltaSize / 2, + leo_ScatDiaSO4[the_particles], side='right') + ind_incan = np.searchsorted(SpecSizeBins+deltaSize / 2, + SizeIncandOnly[the_particles], side='right') + + #axis 0 = time, axis 1 incandesence, axis 2 leo size + np.add.at(leo_IncandScatNumEnsemble[t,:,:], (ind_incan,ind_leo), OneOfEvery[the_particles]) + scat_parts = np.logical_and(scatter_accept, parts_time) incan_parts = np.logical_and(incand_accept, parts_time) #ConcIncanCycle = OneOfEvery * np.sum(incan_parts) @@ -335,6 +383,8 @@ def process_psds(particle_ds, hk_ds, config, deltaSize=0.005, num_bins=199, avg_ ScatMassEnsembleBC[t, :] = ScatMassEnsembleBC[t, :] / FlowCycle #Not in use, always zero ScatNumEnsemble[t, :] = ScatNumEnsemble[t, :] / FlowCycle ScatMassEnsemble[t, :] = ScatMassEnsemble[t, :] / FlowCycle + if leo_fits: + leo_IncandScatNumEnsemble[t,:,:] = leo_IncandScatNumEnsemble[t,:,:] / FlowCycle base_time = pd.Timestamp('1904-01-01') @@ -450,5 +500,29 @@ def process_psds(particle_ds, hk_ds, config, deltaSize=0.005, num_bins=199, avg_ 'IncanMassEnsemble': IncanMassEnsemble, 'ScatNumEnsembleBC': ScatNumEnsembleBC, 'NumFracBC': NumFracBC}) + if leo_fits: + #Make the DataArray with differently named size dimensions + leo_IncandScatNumEnsemble = xr.DataArray(leo_IncandScatNumEnsemble, dims=('time', 'incand_bins', 'leo_bins')) + leo_IncandScatNumEnsemble.attrs["long_name"] = "2D incandesence size (black carbon core) and leo size (shell) \ + number size distributions. Dimensions are (time,incandesence size, leo size)" + leo_IncandScatNumEnsemble.attrs["standard_name"] = "2D core / shell size number size distributions" + leo_IncandScatNumEnsemble.attrs["units"] = "cm-3 per bin" + + #Add new dimensions (xarray want's different dimension names although the dimensions are the + #same) + IncandSizeBins = xr.DataArray(SpecSizeBins, dims=('incand_bins')) + IncandSizeBins.attrs["long_name"] = "Incandesence (core) size bin centers" + IncandSizeBins.attrs["standard_name"] = "particle_diameter" + IncandSizeBins.attrs["units"] = "um" + + LeoSizeBins = xr.DataArray(SpecSizeBins, dims=('leo_bins')) + LeoSizeBins.attrs["long_name"] = "Leo (shell) size bin centers" + LeoSizeBins.attrs["standard_name"] = "particle_diameter" + LeoSizeBins.attrs["units"] = "um" + + #add the dimensions to the Dataset + psd_ds = psd_ds.expand_dims(dim={'incand_bins': IncandSizeBins, 'leo_bins': LeoSizeBins}) + #add the 2D (core,shell) variable + psd_ds['leo_IncandScatNumEnsemble'] = leo_IncandScatNumEnsemble return psd_ds diff --git a/pysp2/util/peak_fit.py b/pysp2/util/peak_fit.py index 9c75d2d..86f38e2 100644 --- a/pysp2/util/peak_fit.py +++ b/pysp2/util/peak_fit.py @@ -1,7 +1,7 @@ import numpy as np import time import dask.bag as db -from multiprocessing import Pool +from multiprocessing import Pool,get_context from itertools import repeat from scipy.optimize import curve_fit @@ -176,17 +176,16 @@ def gaussian_fit(my_ds, config, parallel=False, num_records=None): proc_records = the_bag.map(fit_record).compute() #use multiprocessing.Pool to do the curve fits in parallel elif parallel == 'multiprocessing': - with Pool() as pool: + with get_context("fork").Pool() as pool: proc_records = pool.starmap(_do_fit_records, zip(repeat(my_ds), range(num_records), - repeat(num_trig_pts))) + repeat(num_trig_pts)), chunksize=2000) #else, no parallelism else: proc_records = [] for i in range(num_records): proc_records.append(_do_fit_records(my_ds, i, num_trig_pts)) - FtAmp = np.stack([x[0] for x in proc_records]) FtPos = np.stack([x[1] for x in proc_records]) Base = np.stack([x[2] for x in proc_records]) @@ -604,3 +603,4 @@ def _gaussian_sat_fit(my_ds, record_number): 'height': height, 'error': error} return fit_coeffs + diff --git a/tests/test_gfit.py b/tests/test_gfit.py index 61a8b94..567d2ef 100644 --- a/tests/test_gfit.py +++ b/tests/test_gfit.py @@ -34,7 +34,15 @@ def test_psds(): np.testing.assert_almost_equal(my_psds['ScatMassEnsemble'].sum(), 3.15026266) np.testing.assert_almost_equal(my_psds['IncanMassEnsemble'].sum(), 0.08280955) np.testing.assert_almost_equal(my_binary['DeadtimeRelativeBias'].mean(), -0.00023515, decimal=5) - coeff, beam_profile = pysp2.util.beam_shape( - my_binary, beam_position_from='peak maximum', Globals=pysp2.util.DMTGlobals()) - np.testing.assert_almost_equal(coeff, [9.83851858e-01, 4.64317390e+01, - 1.14337852e+01, 4.46761788e-03]) + + my_binary = pysp2.util.beam_shape(my_binary, + beam_position_from = 'peak maximum', + Globals=pysp2.util.DMTGlobals()) + np.testing.assert_almost_equal(my_binary['leo_PkPos_ch0'].mean(), 46.47738538) + + my_binary = pysp2.util.beam_shape(my_binary, + beam_position_from = 'split point', + Globals=pysp2.util.DMTGlobals()) + np.testing.assert_almost_equal(my_binary['leo_PkPos_ch4'].mean(), 46.75399774) + + \ No newline at end of file