Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
S
solar_ml
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Requirements
Jira
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Mattia Mancini
solar_ml
Commits
db4eb673
Commit
db4eb673
authored
2 years ago
by
Mattia Mancini
Browse files
Options
Downloads
Patches
Plain Diff
Add download and compress data
parent
5885ab5a
No related branches found
No related tags found
No related merge requests found
Changes
2
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
Untitled.ipynb
+0
-198
0 additions, 198 deletions
Untitled.ipynb
download_and_compress_data.ipynb
+8241
-0
8241 additions, 0 deletions
download_and_compress_data.ipynb
with
8241 additions
and
198 deletions
Untitled.ipynb
deleted
100644 → 0
+
0
−
198
View file @
5885ab5a
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "9cd4d4bf",
"metadata": {},
"outputs": [],
"source": [
"!pip install matplotlib\n",
"import numpy as np\n",
"from tqdm import tqdm_notebook\n",
"\n",
"import matplotlib.pyplot as plt\n",
"%matplotlib inline\n",
"import tensorflow as tf\n",
"gpus = tf.config.experimental.list_physical_devices('GPU')\n",
"for gpu in gpus:\n",
" tf.config.experimental.set_memory_growth(gpu, True)\n",
"\n",
" \n",
"import h5py\n",
"\n",
"dataset=None\n",
"timerange=None\n",
"freqrange=None\n",
"ids=None\n",
"fpath = '/project/lofarsw/Data/Dynspec/dset.h5'\n",
"\n",
"with h5py.File(fpath, 'r') as fin:\n",
" in_dataset=fin['/data'][:]\n",
" in_timerange=fin['/time_range'][:]\n",
" in_freqrange=fin['/freq_range'][:]\n",
" in_ids=fin['timestamps'][:]\n",
" \n",
"\n",
"g = np.where(~np.isnan(in_dataset.sum(axis=(1,2))))\n",
"\n",
"dataset = in_dataset[g]\n",
"timerange= in_timerange[g]\n",
"freqrange= in_freqrange[g]\n",
"ids = in_ids[g]\n",
"#avg = dataset[:, :, 404]\n",
"\n",
"\n",
"\n",
"import time\n",
"from IPython import display\n",
"\n",
"import os\n",
"class ConvVarAutoencoder(tf.keras.Model):\n",
" def __init__(self, latent_dim, imsize):\n",
" super().__init__(self)\n",
" self.latent_dim = latent_dim\n",
" shape = imsize\n",
" self.encoder = tf.keras.Sequential(\n",
" [\n",
" tf.keras.layers.InputLayer(input_shape=imsize),\n",
" tf.keras.layers.Conv2D(filters=32, kernel_size=(3,3), strides=(2, 2), activation='relu'),\n",
" #tf.keras.layers.MaxPool2D(pool_size=(3,3), strides=(2, 2)),\n",
" tf.keras.layers.Conv2D(filters=32, kernel_size=(3,3), strides=(2, 2), activation='relu'),\n",
" tf.keras.layers.Conv2D(filters=32, kernel_size=(3,3), strides=(2, 2), activation='relu'),\n",
" \n",
" tf.keras.layers.Flatten(),\n",
" # No activation\n",
" tf.keras.layers.Dense(latent_dim + latent_dim),\n",
" ]\n",
" )\n",
" rescaled = (shape[0] // 4, shape[1] // 4)\n",
" self.decoder = tf.keras.Sequential(\n",
" [\n",
" tf.keras.layers.InputLayer(input_shape=(latent_dim,)),\n",
" tf.keras.layers.Dense(units=rescaled[0]*rescaled[1]*32, activation=tf.nn.relu),\n",
" tf.keras.layers.Reshape(target_shape=(rescaled[0], rescaled[1], 32)),\n",
" tf.keras.layers.Conv2DTranspose(\n",
" filters=32, kernel_size=(3,3), strides=2, padding='same',\n",
" activation='relu'),\n",
" tf.keras.layers.Conv2DTranspose(\n",
" filters=32, kernel_size=(3,3), strides=2, padding='same',\n",
" activation='relu'),\n",
" # No activation\n",
" #tf.keras.layers.UpSampling2D(size=(3, 3), interpolation='nearest'),\n",
" tf.keras.layers.Conv2DTranspose(\n",
" filters=1, kernel_size=(3,3), strides=1, padding='same'),\n",
" ]\n",
" )\n",
" @tf.function\n",
" def sample(self, eps=None):\n",
" if eps is None:\n",
" eps = tf.random.normal(shape=(100, self.latent_dim))\n",
" return self.decode(eps, apply_sigmoid=True)\n",
"\n",
" def encode(self, x):\n",
" mean, logvar = tf.split(self.encoder(x), num_or_size_splits=2, axis=1)\n",
" return mean, logvar\n",
"\n",
" def reparameterize(self, mean, logvar):\n",
" eps = tf.random.normal(shape=mean.shape)\n",
" return eps * tf.exp(logvar * .5) + mean\n",
"\n",
" def decode(self, z, apply_sigmoid=False):\n",
" logits = self.decoder(z)\n",
" if apply_sigmoid:\n",
" probs = tf.sigmoid(logits)\n",
" return probs\n",
" return logits\n",
" \n",
"optimizer = tf.keras.optimizers.Adam(1e-4)\n",
"def log_normal_pdf(sample, mean, logvar, raxis=1):\n",
" log2pi = tf.math.log(2. * np.pi)\n",
" return tf.reduce_sum(\n",
" -.5 * ((sample - mean) ** 2. * tf.exp(-logvar) + logvar + log2pi),\n",
" axis=raxis)\n",
"\n",
"\n",
"def compute_loss(model, x):\n",
" mean, logvar = model.encode(x[0])\n",
" z = model.reparameterize(mean, logvar)\n",
" x_logit = model.decode(z)\n",
" cross_ent = tf.nn.sigmoid_cross_entropy_with_logits(logits=x_logit, labels=x[1])\n",
" logpx_z = -tf.reduce_sum(cross_ent, axis=[1, 2, 3])\n",
" logpz = log_normal_pdf(z, 0., 0.)\n",
" logqz_x = log_normal_pdf(z, mean, logvar)\n",
" return -tf.reduce_mean(logpx_z + logpz - logqz_x)\n",
"\n",
"\n",
"@tf.function\n",
"def train_step(model, x, optimizer):\n",
" \"\"\"Executes one training step and returns the loss.\n",
"\n",
" This function computes the loss and gradients, and uses the latter to\n",
" update the model's parameters.\n",
" \"\"\"\n",
" with tf.GradientTape() as tape:\n",
" loss = compute_loss(model, x)\n",
" gradients = tape.gradient(loss, model.trainable_variables)\n",
" optimizer.apply_gradients(zip(gradients, model.trainable_variables))\n",
"\n",
"epochs = 200\n",
"num_examples_to_generate = 16\n",
"\n",
"# keeping the random vector constant for generation (prediction) so\n",
"# it will be easier to see the improvement.\n",
"random_vector_for_generation = tf.random.normal(\n",
" shape=[num_examples_to_generate, latent_dim])\n",
"model = ConvVarAutoencoder(latent_dim, training_dset.shape[1:])\n",
"os.makedirs('test', exist_ok=True)\n",
"def generate_and_save_images(model, epoch, test_sample):\n",
" mean, logvar = model.encode(test_sample)\n",
" z = model.reparameterize(mean, logvar)\n",
" predictions = model.sample(z)\n",
" fig = plt.figure(figsize=(16, 16))\n",
"\n",
" for i in range(0, 8):\n",
" plt.subplot(4, 4, i + 1)\n",
" plt.imshow(np.array(predictions[i, :, :, 0]).T, cmap='viridis', origin='lower', aspect='auto', vmin=0, vmax=1)\n",
" plt.xlabel('time')\n",
" plt.ylabel('freq')\n",
" for i in range(0, 8):\n",
" plt.subplot(4, 4, i + 9)\n",
" plt.imshow(np.array(test_sample[i, :, :, 0]).T, cmap='viridis', origin='lower', aspect='auto', vmin=0, vmax=1)\n",
" plt.xlabel('freq')\n",
" plt.ylabel('time')\n",
" \n",
"\n",
" # tight_layout minimizes the overlap between 2 sub-plots\n",
" plt.savefig('test/image_at_epoch_{:04d}.png'.format(epoch))\n",
" plt.show()\n",
"\n",
"assert batch_size >= num_examples_to_generate\n",
"for test_batch in test_dataset.take(1):\n",
" test_sample = test_batch[0][0:num_examples_to_generate, :, :, :]\n",
" "
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.10"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
%% Cell type:code id:9cd4d4bf tags:
```
python
!
pip
install
matplotlib
import
numpy
as
np
from
tqdm
import
tqdm_notebook
import
matplotlib.pyplot
as
plt
%
matplotlib
inline
import
tensorflow
as
tf
gpus
=
tf
.
config
.
experimental
.
list_physical_devices
(
'
GPU
'
)
for
gpu
in
gpus
:
tf
.
config
.
experimental
.
set_memory_growth
(
gpu
,
True
)
import
h5py
dataset
=
None
timerange
=
None
freqrange
=
None
ids
=
None
fpath
=
'
/project/lofarsw/Data/Dynspec/dset.h5
'
with
h5py
.
File
(
fpath
,
'
r
'
)
as
fin
:
in_dataset
=
fin
[
'
/data
'
][:]
in_timerange
=
fin
[
'
/time_range
'
][:]
in_freqrange
=
fin
[
'
/freq_range
'
][:]
in_ids
=
fin
[
'
timestamps
'
][:]
g
=
np
.
where
(
~
np
.
isnan
(
in_dataset
.
sum
(
axis
=
(
1
,
2
))))
dataset
=
in_dataset
[
g
]
timerange
=
in_timerange
[
g
]
freqrange
=
in_freqrange
[
g
]
ids
=
in_ids
[
g
]
#avg = dataset[:, :, 404]
import
time
from
IPython
import
display
import
os
class
ConvVarAutoencoder
(
tf
.
keras
.
Model
):
def
__init__
(
self
,
latent_dim
,
imsize
):
super
().
__init__
(
self
)
self
.
latent_dim
=
latent_dim
shape
=
imsize
self
.
encoder
=
tf
.
keras
.
Sequential
(
[
tf
.
keras
.
layers
.
InputLayer
(
input_shape
=
imsize
),
tf
.
keras
.
layers
.
Conv2D
(
filters
=
32
,
kernel_size
=
(
3
,
3
),
strides
=
(
2
,
2
),
activation
=
'
relu
'
),
#tf.keras.layers.MaxPool2D(pool_size=(3,3), strides=(2, 2)),
tf
.
keras
.
layers
.
Conv2D
(
filters
=
32
,
kernel_size
=
(
3
,
3
),
strides
=
(
2
,
2
),
activation
=
'
relu
'
),
tf
.
keras
.
layers
.
Conv2D
(
filters
=
32
,
kernel_size
=
(
3
,
3
),
strides
=
(
2
,
2
),
activation
=
'
relu
'
),
tf
.
keras
.
layers
.
Flatten
(),
# No activation
tf
.
keras
.
layers
.
Dense
(
latent_dim
+
latent_dim
),
]
)
rescaled
=
(
shape
[
0
]
//
4
,
shape
[
1
]
//
4
)
self
.
decoder
=
tf
.
keras
.
Sequential
(
[
tf
.
keras
.
layers
.
InputLayer
(
input_shape
=
(
latent_dim
,)),
tf
.
keras
.
layers
.
Dense
(
units
=
rescaled
[
0
]
*
rescaled
[
1
]
*
32
,
activation
=
tf
.
nn
.
relu
),
tf
.
keras
.
layers
.
Reshape
(
target_shape
=
(
rescaled
[
0
],
rescaled
[
1
],
32
)),
tf
.
keras
.
layers
.
Conv2DTranspose
(
filters
=
32
,
kernel_size
=
(
3
,
3
),
strides
=
2
,
padding
=
'
same
'
,
activation
=
'
relu
'
),
tf
.
keras
.
layers
.
Conv2DTranspose
(
filters
=
32
,
kernel_size
=
(
3
,
3
),
strides
=
2
,
padding
=
'
same
'
,
activation
=
'
relu
'
),
# No activation
#tf.keras.layers.UpSampling2D(size=(3, 3), interpolation='nearest'),
tf
.
keras
.
layers
.
Conv2DTranspose
(
filters
=
1
,
kernel_size
=
(
3
,
3
),
strides
=
1
,
padding
=
'
same
'
),
]
)
@tf.function
def
sample
(
self
,
eps
=
None
):
if
eps
is
None
:
eps
=
tf
.
random
.
normal
(
shape
=
(
100
,
self
.
latent_dim
))
return
self
.
decode
(
eps
,
apply_sigmoid
=
True
)
def
encode
(
self
,
x
):
mean
,
logvar
=
tf
.
split
(
self
.
encoder
(
x
),
num_or_size_splits
=
2
,
axis
=
1
)
return
mean
,
logvar
def
reparameterize
(
self
,
mean
,
logvar
):
eps
=
tf
.
random
.
normal
(
shape
=
mean
.
shape
)
return
eps
*
tf
.
exp
(
logvar
*
.
5
)
+
mean
def
decode
(
self
,
z
,
apply_sigmoid
=
False
):
logits
=
self
.
decoder
(
z
)
if
apply_sigmoid
:
probs
=
tf
.
sigmoid
(
logits
)
return
probs
return
logits
optimizer
=
tf
.
keras
.
optimizers
.
Adam
(
1e-4
)
def
log_normal_pdf
(
sample
,
mean
,
logvar
,
raxis
=
1
):
log2pi
=
tf
.
math
.
log
(
2.
*
np
.
pi
)
return
tf
.
reduce_sum
(
-
.
5
*
((
sample
-
mean
)
**
2.
*
tf
.
exp
(
-
logvar
)
+
logvar
+
log2pi
),
axis
=
raxis
)
def
compute_loss
(
model
,
x
):
mean
,
logvar
=
model
.
encode
(
x
[
0
])
z
=
model
.
reparameterize
(
mean
,
logvar
)
x_logit
=
model
.
decode
(
z
)
cross_ent
=
tf
.
nn
.
sigmoid_cross_entropy_with_logits
(
logits
=
x_logit
,
labels
=
x
[
1
])
logpx_z
=
-
tf
.
reduce_sum
(
cross_ent
,
axis
=
[
1
,
2
,
3
])
logpz
=
log_normal_pdf
(
z
,
0.
,
0.
)
logqz_x
=
log_normal_pdf
(
z
,
mean
,
logvar
)
return
-
tf
.
reduce_mean
(
logpx_z
+
logpz
-
logqz_x
)
@tf.function
def
train_step
(
model
,
x
,
optimizer
):
"""
Executes one training step and returns the loss.
This function computes the loss and gradients, and uses the latter to
update the model
'
s parameters.
"""
with
tf
.
GradientTape
()
as
tape
:
loss
=
compute_loss
(
model
,
x
)
gradients
=
tape
.
gradient
(
loss
,
model
.
trainable_variables
)
optimizer
.
apply_gradients
(
zip
(
gradients
,
model
.
trainable_variables
))
epochs
=
200
num_examples_to_generate
=
16
# keeping the random vector constant for generation (prediction) so
# it will be easier to see the improvement.
random_vector_for_generation
=
tf
.
random
.
normal
(
shape
=
[
num_examples_to_generate
,
latent_dim
])
model
=
ConvVarAutoencoder
(
latent_dim
,
training_dset
.
shape
[
1
:])
os
.
makedirs
(
'
test
'
,
exist_ok
=
True
)
def
generate_and_save_images
(
model
,
epoch
,
test_sample
):
mean
,
logvar
=
model
.
encode
(
test_sample
)
z
=
model
.
reparameterize
(
mean
,
logvar
)
predictions
=
model
.
sample
(
z
)
fig
=
plt
.
figure
(
figsize
=
(
16
,
16
))
for
i
in
range
(
0
,
8
):
plt
.
subplot
(
4
,
4
,
i
+
1
)
plt
.
imshow
(
np
.
array
(
predictions
[
i
,
:,
:,
0
]).
T
,
cmap
=
'
viridis
'
,
origin
=
'
lower
'
,
aspect
=
'
auto
'
,
vmin
=
0
,
vmax
=
1
)
plt
.
xlabel
(
'
time
'
)
plt
.
ylabel
(
'
freq
'
)
for
i
in
range
(
0
,
8
):
plt
.
subplot
(
4
,
4
,
i
+
9
)
plt
.
imshow
(
np
.
array
(
test_sample
[
i
,
:,
:,
0
]).
T
,
cmap
=
'
viridis
'
,
origin
=
'
lower
'
,
aspect
=
'
auto
'
,
vmin
=
0
,
vmax
=
1
)
plt
.
xlabel
(
'
freq
'
)
plt
.
ylabel
(
'
time
'
)
# tight_layout minimizes the overlap between 2 sub-plots
plt
.
savefig
(
'
test/image_at_epoch_{:04d}.png
'
.
format
(
epoch
))
plt
.
show
()
assert
batch_size
>=
num_examples_to_generate
for
test_batch
in
test_dataset
.
take
(
1
):
test_sample
=
test_batch
[
0
][
0
:
num_examples_to_generate
,
:,
:,
:]
```
This diff is collapsed.
Click to expand it.
download_and_compress_data.ipynb
0 → 100644
+
8241
−
0
View file @
db4eb673
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment