Skip to content
Snippets Groups Projects
Commit b066493c authored by Maaijke Mevius's avatar Maaijke Mevius
Browse files

data reading in chunks for large (raw) data files

parent a4afdcc2
No related branches found
No related tags found
No related merge requests found
Pipeline #49615 passed
...@@ -169,7 +169,7 @@ def extract_root_metadata(dataset): ...@@ -169,7 +169,7 @@ def extract_root_metadata(dataset):
metadata['OBSERVATION_END_UTC'] = metadata['OBSERVATION_END_UTC'].split(' ')[0] metadata['OBSERVATION_END_UTC'] = metadata['OBSERVATION_END_UTC'].split(' ')[0]
if not 'TARGET' in list(metadata.keys()): if not 'TARGET' in list(metadata.keys()):
if 'TARGETS' in list(metadata.keys()): if 'TARGETS' in list(metadata.keys()):
metadata['TARGET']=metadata['TARGETS'][0] metadata['TARGET']=metadata['TARGETS'][-1].split("_")[0]
return metadata return metadata
...@@ -517,9 +517,12 @@ def split_samples(dynspec_name, ...@@ -517,9 +517,12 @@ def split_samples(dynspec_name,
station_name, *_ = metadata['OBSERVATION_STATIONS_LIST'] station_name, *_ = metadata['OBSERVATION_STATIONS_LIST']
if 'DATA' in dataset[dynspec_name]: if 'DATA' in dataset[dynspec_name]:
data_array = dataset[dynspec_name]['DATA'][:, :, 0] data_array = dataset[dynspec_name]['DATA']
nofch = 1 #TODO check if this is always the case for DYNSPEC data
else: else:
data_array = dataset[dynspec_name]['BEAM_000']['STOKES_0'][:, :] data_array = dataset[dynspec_name]['BEAM_000']['STOKES_0']
#take median over channels for raw data
nofch = dataset[dynspec_name]['BEAM_000']['STOKES_0'].attrs['NOF_CHANNELS'][0]
averaging_window_in_samples = int(numpy.ceil(averaging_window / time_delta)) averaging_window_in_samples = int(numpy.ceil(averaging_window / time_delta))
averaging_window_in_seconds = averaging_window_in_samples * time_delta averaging_window_in_seconds = averaging_window_in_samples * time_delta
S4_60s_window_in_samples = int(60. / time_delta) S4_60s_window_in_samples = int(60. / time_delta)
...@@ -533,7 +536,6 @@ def split_samples(dynspec_name, ...@@ -533,7 +536,6 @@ def split_samples(dynspec_name,
time_obs = numpy.linspace(obs_start_time.timestamp(), obs_end_time.timestamp(), time_obs = numpy.linspace(obs_start_time.timestamp(), obs_end_time.timestamp(),
total_time_samples) total_time_samples)
n_samples = int((end_obs_datetime - start_obs_datetime).seconds // sample_window) n_samples = int((end_obs_datetime - start_obs_datetime).seconds // sample_window)
for i in range(n_samples): for i in range(n_samples):
start_sample_datetime = round_down_datetime( start_sample_datetime = round_down_datetime(
start_obs_datetime + timedelta(seconds=sample_window * i), start_obs_datetime + timedelta(seconds=sample_window * i),
...@@ -567,10 +569,21 @@ def split_samples(dynspec_name, ...@@ -567,10 +569,21 @@ def split_samples(dynspec_name,
compute_start_end_azimuth_elevation(sample_info) compute_start_end_azimuth_elevation(sample_info)
sample_rate = int( sample_rate = int(
averaging_window_in_samples * 3. / averaging_window_in_seconds) averaging_window_in_samples * 3. / averaging_window_in_seconds)
logging.info("taking median over %d channels",nofch)
if nofch>1:
nsb = data_array.shape[1]//nofch
tmp_data = data_array[start_index:end_index]
#fill again in loop since otherwise some data is filled with zeros, not understood why
for ich in range(nofch):
tmp_data[:,ich*nsb:(ich+1)*nsb] = data_array[start_index:end_index,ich*nsb:(ich+1)*nsb]
tmp_data = numpy.median(tmp_data.reshape((-1,nsb,nofch)),axis=-1)
else:
tmp_data = data_array[start_index:end_index]
tmp_data = tmp_data.squeeze()
frequency_axis = numpy.linspace(start_frequency, end_frequency, frequency_axis = numpy.linspace(start_frequency, end_frequency,
data_array.shape[1]) tmp_data.shape[1])
filtered_data, flags, flux, bandpass = apply_bandpass( filtered_data, flags, flux, bandpass = apply_bandpass(
data_array[start_index:end_index], frequency_axis, tmp_data, frequency_axis,
freqaxis=1, timeaxis=0, target=metadata["TARGET"], freqaxis=1, timeaxis=0, target=metadata["TARGET"],
sample_rate=sample_rate, sample_rate=sample_rate,
# sample every 3 seconds # sample every 3 seconds
...@@ -590,8 +603,7 @@ def split_samples(dynspec_name, ...@@ -590,8 +603,7 @@ def split_samples(dynspec_name,
axis=0, has_nan=False) axis=0, has_nan=False)
averaged_data_array, time_axis, frequency_axis, flags, bandpass = \ averaged_data_array, time_axis, frequency_axis, flags, bandpass = \
create_averaged_dataset(sample_info, create_averaged_dataset(sample_info,
data_array[ tmp_data,
start_index:end_index],
flags, flags,
bandpass) # make sure the data is shaped to contain integer of window bandpass) # make sure the data is shaped to contain integer of window
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment