-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfunctions.py
668 lines (601 loc) · 23.3 KB
/
functions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
'''
Name: Sophie Turner.
Date: 19/1/2023.
Contact: st838@cam.ac.uk
Functions which are used by prediction scripts.
Files are located at scratch/st838/netscratch.
'''
# module load anaconda/python3/2022.05
# conda activate /home/st838/nethome/condaenv
# Tell this script to run with the currently active Python environment, not the computer's local versions.
#!/usr/bin/env python
import math
import glob
import psutil
import warnings
import numpy as np
from PIL import Image
import constants as con
import cartopy.crs as ccrs
import file_paths as paths
import matplotlib.pyplot as plt
from sklearn import linear_model
from sklearn.metrics import max_error, mean_squared_error, mean_absolute_percentage_error, r2_score
# Metadata functions.
def get_idx_names(name_file):
'''Names of the fields, taken from metadata.
name_file: file path containing array of fields' indices and names.
Returns a list of indices and names.'''
lines = open(name_file, 'r')
idx_names = []
for line in lines:
words = line.replace('\n', '')
words = words.split(maxsplit=1)
lst = [words[0], words[1]]
idx_names.append(lst)
return(idx_names)
# Data preprocessing functions.
def collate(training_data_path, input_files):
'''Collate the data from multiple files, if it hasn't already been done.
training_data_path: file path of where to save the combined data.
input_files: list of training data file paths.'''
if os.path.exists(training_data_path):
days = np.load(training_data_path)
else:
# Memory usage limit.
max_mem = 75 # Percentage.
days = np.empty((con.n_fields, 0), dtype=np.float32)
for i in range(len(input_files)):
print(f'Adding day {i+1} to array.')
start = time.time()
npy_file = input_files[i]
data = np.load(npy_file)
days = np.append(days, data, axis=1)
mem_used = round(psutil.virtual_memory().percent, 1)
if mem_used > max_mem:
sys.exit(f'The maximum memory usage limit of {max_mem}% was exceeded with a {i+1} day array of size {days.shape}.')
end = time.time()
seconds = round(end - start)
gb = round(psutil.virtual_memory().used / 1000000000, 3)
print(f'That day took {seconds} seconds.')
print(f'Size of array: {days.shape}.')
print(f'Memory used so far: {gb} GB.')
print(f'Memory usage at {mem_used}%.')
# Save the np array for training data for re-use.
# The .npy should be deleted after final use to free up space as it is very large.
print('Saving .npy.')
start = time.time()
np.save(training_data_path, days)
end = time.time()
seconds = round(end - start)
print(f'That took {seconds} seconds.')
# Return one np array containing data from multiple files.
return(days)
def find_cloud_end(data):
'''Find out at what level there are no more clouds.
data: np array of the contents of an entire .npy file of data, of shape (features, samples).
'''
# For each grid & time point...
n_points = len(data[0])
# First altitude value.
first_alt = data[1,0]
# Stride = lats x lons to check each altitude.
stride = 144 * 192
j = 0
for i in range(0, n_points, stride):
j += 1
alt = data[1,i]
# The altitudes are repeated in the data structure due to flattening. Don't process same ones again.
if i != 0 and alt == first_alt:
break
# Indices of all data points at this altitude.
layer = np.where(data[1] == alt)
# Total cloud in this layer.
cloud_in_layer = np.sum(data[con.cloud, layer])
if cloud_in_layer == 0:
break
return(alt)
def sum_cloud(data, out_path=None):
'''Sum the cloud in the column above each grid box and include that as a feature in training data.
data: np array of the contents of an entire .npy file of data, of shape (features, samples).
'''
print('Summing cloud columns.')
# Check at which altitude clouds stop.
clear = find_cloud_end(data)
# Make a new array for cloud col.
col = np.zeros(data.shape[1], dtype=np.float32)
# For each grid & time point...
n_points = len(data[0])
print(f'This will take a while. At least half an hour. Go for a walk or something.')
for i in range(n_points):
# Get the time, altitude, latitude and longitude.
alt = data[con.alt,i]
# Ignore any altitudes which are too high for clouds.
if alt >= clear:
continue
lat = data[con.lat,i]
lon = data[con.lon,i]
dt = data[con.days,i] # date-time
cloud = data[con.cloud,i]
# Search only grid boxes after this one, not before.
# Stride = lats x lons.
stride = 144 * 192
for j in range(i, n_points, stride):
alt_j = data[con.alt,j]
# Ignore any altitudes which are too high for clouds.
if alt_j >= clear:
break
lat_j = data[con.lat,j]
lon_j = data[con.lon,j]
dt_j = data[con.days,j] # date-time
# Get all the grid boxes where time, lat & lon are the same AND alt > this alt.
if dt_j == dt and alt_j > alt and lat_j == lat and lon_j == lon:
# Sum the cloud of those grid boxes.
cloud_j = data[con.cloud,j]
cloud += cloud_j
# Add this number to the new feature at this grid box.
col[i] = cloud
# Add the new array as a feature in the dataset, in a new column after the current cloud one.
data = np.insert(data, con.cloud+1, col, axis=0)
# Save the new dataset.
if out_path is not None:
print('Saving new dataset.')
np.save(out_path, data)
# Return the new dataset.
return(data)
def shape(rows):
'''Make single input fields 2d so that they fit with other parameters going into the ML functions.
rows: np array of fields chosen for training or testing.
'''
if rows.ndim == 1:
rows = np.vstack((rows, ))
return(rows)
def remove_all_zero(data):
'''Remove all-zero fields from dataset.'''
removes = []
for i in range(len(data)):
field = data[i]
if np.all(field == 0):
removes.append(i)
data = np.delete(data, removes, axis=0)
return(data)
def all_zeros(field):
'''Check if a field contains only zeros.'''
zeros = False
if np.all(field == 0):
zeros = True
return(zeros)
def get_name(idx, idx_names):
'''Get the name of a field from its index number.'''
for idx_name in idx_names:
if idx_name[0] == idx:
name = idx_name[1].split()[1]
name = name.split('_')[0]
return(name)
def day_trop(data):
'''Remove night and upper stratosphere were Fast-J shouldn't be used.
data: 2d numpy array of flattened UM output data, of shape (features, samples).
'''
# Remove night.
data = data[:, np.where(data[con.down_sw_flux] > 0)].squeeze()
# Remove upper portion where Fast-J might not work.
data = data[:, np.where(data[con.pressure] > 20)].squeeze()
return(data)
def split_pressure(data):
'''Split the UKCA dataset into two: one above and one below the Fast-J cutoff, using pressure.
Data: 2D np array of whole UKCA dataset, of shape (features, samples).
'''
cutoff = 20 # Pascals.
bottom = data[:, np.where(data[con.pressure] > cutoff)].squeeze()
top = data[:, np.where(data[con.pressure] < cutoff)].squeeze()
return(bottom, top)
def sample(data, size=None):
'''Make a smaller dataset by randomly sampling the data, uniformly.
Choose a big enough size to capture sufficient tropospheric density.
To do: use a better function than random uniform sampling.
data: np array of dataset, 1D or 2D of shape (features, samples).
size: number of data points desired. Leave empty for auto (10%).
'''
if data.ndim == 1:
length = len(data)
else:
length = len(data[0])
if size is None:
size = round(length / 10) # 10%.
i = con.rng.integers(0, length, size, dtype=np.int32)
if data.ndim == 1:
data = data[i]
else:
data = data[:, i]
return(data)
def in_out_swap(data, i_inputs=con.phys_main, i_targets=con.J_core):
'''Select inputs and targets for using ML and swap dimensions over to make data compatible with random forest shape.
data: 2D numpy array of flattened UKCA output data, of shape (features, samples).
i_inputs, i_targets: optional 1D list of ints, indices of input/target features.
Returns data in shape (samples, features).
'''
# Input data.
inputs = data[i_inputs]
if inputs.ndim == 1:
inputs = inputs.reshape(1, -1)
inputs = np.swapaxes(inputs, 0, 1)
print('Inputs:', inputs.shape)
# Target data.
targets = data[i_targets]
if targets.ndim > 1:
targets = np.swapaxes(targets, 0, 1)
print('Targets:', targets.shape)
return(inputs, targets)
def tts(inputs, targets, test_size=0.1):
'''Random but sorted train test split function which returns indices for coords.
inputs: 2d np array of input data, of shape (samples, features).
targets: 2d np array of target data, of shape (samples, features).
test_size: float out of 1, proportion of data to use for test set.
Returns TTS data and indices of split in shape (samples, features).
'''
# Find length of data and size of test set.
len_all = len(inputs)
len_test = round(len_all * test_size)
i_all = np.arange(0, len_all, dtype=int)
# Generate random, unique indices for selecting test set.
i_test = con.rng.choice(i_all, len_test, replace=False)
# Put them back into the right order.
i_test = np.sort(i_test)
# Make test set.
in_test = inputs[i_test]
out_test = targets[i_test]
# Remove these from the training set.
in_train = np.delete(inputs, i_test, axis=0)
out_train = np.delete(targets, i_test, axis=0)
# Return train and test datasets and their indices from the full dataset.
return(in_train, in_test, out_train, out_test, i_test)
# ML functions.
def train(in_train, out_train):
'''Set up simple model.'''
model = linear_model.LinearRegression()
model.fit(in_train, out_train)
return(model)
def sMAPE(out_test, out_pred):
'''Compute the symmetric mean absolute percentage error.
Like MAPE but handles values close to zero better.
test: array of test targets. pred: array of prediction outputs.
'''
warnings.filterwarnings('ignore')
n = len(out_test)
diffs = 0
for i in range(n):
diffs += (abs(out_test[i] - out_pred[i])) / (out_test[i] + out_pred[i])
smape = (100 / n) * diffs
return(smape)
def metrics_2d(out_test, out_pred):
'''Get metrics over 2 dimensions for metrics which can only be used on 1 dim.
out_test, out_pred: 2D arrays.
'''
n = len(out_test[0])
maxes = [] # Array of max errors for each target.
smape = 0 # Average percentage error.
for target in range(n):
target_test = out_test[:,target].squeeze()
target_pred = out_pred[:,target].squeeze()
maxe = max_error(target_test, target_pred)
maxes.append(maxe)
smape += sMAPE(target_test, target_pred)
smape = smape / n
return(maxes, smape)
def test(model, in_test, out_test):
# Try it out.
out_pred = model.predict(in_test)
# See how well it did.
if out_test.ndim == 1:
maxe = max_error(out_test, out_pred)
smape = sMAPE(out_test, out_pred)
else:
maxe, smape = metrics_2d(out_test, out_pred)
mse = mean_squared_error(out_test, out_pred)
mape = mean_absolute_percentage_error(out_test, out_pred)
r2 = round(r2_score(out_test, out_pred), 3)
return(out_pred, maxe, mse, mape, smape, r2)
def only_range(inputs, targets, targets_all, bottom=0, top=1, keep=True, min_samples=10):
'''Only use a certain range of values of J rates.
Call just before fns.test() and pass only test sets if you want model trained on all data but only tested on smallest.
inputs: array of inputs (in_test for test-only).
targets: array of targets (out_test for test-only).
targets_all: array of all targets in data, used for the test-only option. If using all data, just pass the targets for both target params.
bottom: chosen proportion of data for lowest value, 0 to 1, inclusive.
top: chosen proportion of data for highest value, 0 to 1, inclusive.
min_samples: minimum number of samples a J rate needs for it to be included.
keep: whether to keep the selected portion and remove the rest (True), or remove the selected portion and keep the rest (False).
'''
assert bottom < top
if targets.ndim == 1:
top_all = max(targets_all)
bottom_new = top_all * bottom
top_new = top_all * top
i_range = np.where((targets >= bottom_new) & (targets <= top_new))
# Reduce the data to these indices.
if keep:
inputs = inputs[i_range]
targets = targets[i_range]
else:
inputs = np.delete(inputs, i_range, axis=0)
targets = np.delete(targets, i_range)
else:
deletes = np.array([], dtype=int)
# For each target feature...
for i in range(len(targets_all[0])):
target = targets_all[:, i]
# Get the index.
top_all = max(target)
bottom_new = top_all * bottom
top_new = top_all * top
i_range = np.where((targets[:, i] >= bottom_new) & (targets[:, i] <= top_new))
if keep:
# Sometimes there are no data in this region for some J rates.
if len(i_range[0]) < min_samples:
deletes = np.append(deletes, i)
else:
# Reduce the data to these indices.
inputs = inputs[i_range]
targets = targets[i_range]
else:
inputs = np.delete(inputs, i_range, axis=0)
targets = np.delete(targets, i_range, axis=0)
if keep:
# Remove these J rates from the selection.
targets = np.delete(targets, deletes, axis=1)
return(inputs, targets, deletes)
# Results & postprocessing functions.
def load_model_data(mod_name='rf'):
'''Loads and returns input, target and prediction sample data from a trained ML model.
mod_name: string, name of ML model and its folder.
'''
# File paths.
mod_path = f'{paths.mod}/{mod_name}/{mod_name}'
inputs_path = f'{mod_path}_inputs.npy'
targets_path = f'{mod_path}_targets.npy'
preds_path = f'{mod_path}_pred.npy'
# Load data.
print('\nLoading data.')
inputs = np.load(inputs_path)
targets = np.load(targets_path)
preds = np.load(preds_path)
print('Inputs:', inputs.shape)
print('Targets:', targets.shape)
print('Preds:', preds.shape)
return(inputs, targets, preds)
def make_cols_map(inputs, targets, preds, j_idx=None, out_path=None):
'''Make an array of column performance.
inputs, targets, preds: 2d numpy arrays of ML model data, of shape(samples, features).
j_idx: int or None, index in target data of which J rate to look at. If None, plot avg of all.
out_path: string or None. File path to save map as.
'''
warnings.filterwarnings('ignore') # R2 score on 1 datapoint.
# Get the unique lat & lon values.
lats = inputs[:, con.lat]
lats = np.unique(lats)
lons = inputs[:, con.lon]
lons = np.unique(lons)
# Make a 2d list of coords, R2 scores and % differences for each lat and lon point.
grid = []
# For each lat...
for i in range(len(lats)):
lat = lats[i]
# And each lon...
for j in range(len(lons)):
lon = lons[j]
# Get the indices of all the data in that column.
idx = np.where((inputs[:, con.lat] == lat) & (inputs[:, con.lon] == lon))
idx = idx[0]
# Get all the target J rates in that column.
target = targets[idx, j_idx].squeeze()
# There might not be a data point at this location. Skip if so.
if not np.any(target) or target.ndim == 0:
continue
# Get the predicted J rates in the column.
pred = preds[idx, j_idx].squeeze()
# Get the R2 score and % diff.
r2 = r2_score(target, pred)
diff = np.nan_to_num(((np.mean(pred) - np.mean(target)) / np.mean(target)) * 100, posinf=0, neginf=0)
# Save the R2 scores in a 2d list for every lat & lon.
grid.append([lat, lon, r2, diff])
grid = np.array(grid)
print(grid.shape)
if out_path is not None:
# Save the array because it takes ages to make.
np.save(out_path, grid)
# Returns the map as a 2d numpy array of lat, lon, r2, diff for each col.
return(grid)
# Plot functions.
def shrink(out_test, out_pred):
'''Don't plot an unnecessary number of data points i.e. >10000.'''
# Make them the right shape.
out_test = out_test.squeeze()
out_pred = out_pred.squeeze()
length = len(out_pred)
if length > 10000:
# Plotting this many datapoints is excessive and costly. Reduce it to 10000.
idxs = np.int16(np.linspace(0, length-1, 10000))
out_test = out_test[idxs]
out_pred = out_pred[idxs]
del(idxs)
# Choose opacity of points.
alpha = 0.05
elif length > 100:
alpha = 0.1
else:
alpha = 1
return(out_test, out_pred, alpha)
def force_axes():
'''Make plot axes exactly the same.'''
plt.axis('square')
xticks, xlabels = plt.xticks()
yticks, ylabels = plt.yticks()
plt.axis('auto')
if len(yticks) > len(xticks):
tix = yticks
lab = ylabels
else:
tix = xticks
lab = xlabels
plt.xticks(ticks=tix, labels=lab)
plt.yticks(ticks=tix, labels=lab)
def line(xdata, ydata, xdata2=None, ydata2=None, title='', xlab='', ylab='', leg1='', leg2=''):
'''Show a line plot of 2 vars.
xdata, ydata: arrays to plot.
xdata2, ydata2: optional 2nd line to plot.
xlab, ylab: optional axis text.
leg1, leg2: optional legend labels if using 2 lines.
'''
plt.plot(xdata, ydata, label=leg1)
if xdata2 is not None:
plt.plot(xdata2, ydata2, label=leg2)
plt.legend()
plt.title(title)
plt.xlabel(xlab)
plt.ylabel(ylab)
plt.show()
plt.close()
def show(out_test, out_pred, maxe=None, mse=None, mape=None, smape=None, r2=None, j='', t=None):
r2 = round(r2, 3)
if maxe is not None: print(f'MaxE = {maxe}')
if mse is not None: print(f'MSE = {mse}')
if mape is not None: print(f'MAPE = {mape}')
if smape is not None: print(f'SMAPE = {smape}')
if t is None: t = j
# Don't plot >10000 points.
out_test, out_pred, a = shrink(out_test, out_pred)
plt.scatter(out_test, out_pred, alpha=a)
# Make a 1:1 reference line.
plt.plot(out_test, out_test, linestyle=':', color='grey', alpha=0.5)
# Force axes to be identical.
force_axes()
if r2 is not None:
print(f'Coefficient of determination = {r2}')
plt.title(f'{t} J rates. {con.r2}={r2}')
plt.xlabel(f'{j} J rate from Fast-J / {con.pers}')
plt.ylabel(f'{j} J rate from random forest / {con.pers}')
plt.show()
plt.close()
def show_col_orig(data, ij=78, name='O3', all_time=True):
'''Show a column of J rate by altitude for full UKCA datasets before ML.
data: 2d np array of full dataset.
ij: index of J rate to look at. 78=O3 in full dataset.
name: name of J rate.
all_time: whether to include all time steps (True) or plot a line of one time point (False).
'''
# Pick a lat-lon point and a time.
# Cambridge at midday.
lat, lon, hour = 51.875, 0.9375, 12
# Choose whether to specify the time point.
if not all_time:
# Find the first occurance of this time so that we only select one day.
data = data[:, np.where(data[con.hour] == hour)].squeeze()
dt0 = np.unique(data[con.days])[0]
data = data[:, np.where(data[con.days] == dt0)].squeeze()
# Fetch this grid col's data.
data = data[:, np.where((data[con.lat] == lat) & (data[con.lon] == lon))].squeeze()
# Pick out the J rate and altitude.
j = data[ij]
alts = data[con.alt]
# Convert to km.
alts = alts * 85
# Plot the J rate by altitude in that column.
if all_time:
plt.scatter(j, alts, label=f'J{name} from UKCA', alpha=0.2)
plt.title('UKCA column ozone J rates over Cambridge in 2015')
else:
plt.plot(j, alts, label=f'J{name} from UKCA')
plt.title('UKCA column ozone J rates over Cambridge at midday on 15/1/2015')
plt.xlabel(f'{con.rxnO3} J rate')
plt.ylabel('Altitude / km')
plt.show()
plt.close()
def col(data, coords, lat, lon, hour, ij):
'''Show a column of J rate by altitude to compare targets and predictions.
data: 2d np array dataset e.g. full UKCA data or ML targets.
lat, lon: latitude and longitude of chosen column.
hour: hour of day of chosen sample or None if plotting all time points.
ij: index of J rate to look at.
'''
# Stick the coords on for easier processing.
if data.ndim == 1:
data = np.expand_dims(data, 1)
data = np.append(coords, data, axis=1)
if hour is not None:
# Find the first occurance of this time so that we only select one day.
data = data[np.where(data[:, con.hour] == hour)].squeeze()
dt0 = np.unique(data[:, con.days])[0]
data = data[np.where(data[:, con.days] == dt0)].squeeze()
# Fetch this grid col's data.
data = data[np.where((data[:, con.lat] == lat) & (data[:, con.lon] == lon))].squeeze()
# Pick out the J rate and altitude.
ij += 5 # Account for added coords.
j = data[:, ij]
alts = data[:, con.alt]
alts = alts * 85 # Convert proportion to km.
return(j, alts)
def show_col(out_test, out_pred, coords, ij, name='O3', all_time=True):
'''Show a column of J rate by altitude.
out_test: 2d np array dataset of targets.
out_pred: 2d np array dataset of predictions.
coords: 2d np array of time & space co-ordinates for datasets.
ij: index of J rate to look at.
name: name of J rate.
all_time: whether to include all time steps (True) or plot a line of one time point (False).
'''
# Pick a lat-lon point and a time.
lat, lon, hour = 51.875, 0.9375, 12 # Cambridge at midday.
# Swap the dimensions to make them compatible with sklearn.
coords = np.swapaxes(coords, 0, 1)
# Choose whether to specify the time point.
if all_time:
hour = None
j1, alts = col(out_test, coords, lat, lon, hour, ij)
j2, alts = col(out_pred, coords, lat, lon, hour, ij)
# Plot the J rate by altitude in that column.
if all_time:
plt.scatter(j1, alts, label=f'J{name} from UKCA', marker='|', s=50, alpha=0.5)
plt.scatter(j2, alts, label=f'J{name} from random forest', marker='_', s=50, alpha=0.5)
plt.title('UKCA J rates, column over Cambridge in 2015')
else:
plt.plot(j1, alts, label=f'J{name} from UKCA')
plt.plot(j2, alts, label=f'J{name} from random forest')
plt.title(f'UKCA J{name}, column over Cambridge at midday on 15/7/2015')
plt.legend()
plt.xlabel(f'J{name} / {con.pers}')
plt.ylabel('Altitude / km')
plt.show()
# Column percentage difference plot.
diff = np.nan_to_num(((j2 - j1) / j1) * 100, posinf=0, neginf=0) # Accounts for div by zero.
if all_time:
plt.hist(diff, bins=100)
else:
plt.plot(diff, alts)
plt.axvline(linestyle=':')
plt.title(f'Column % difference of J{name} predictions to UKCA outputs')
plt.xlabel('% difference')
plt.ylabel('Altitude / km')
plt.show()
plt.close()
def make_gif(dir_path, ext='png', gif_name='GIF', ms=500):
'''Makes a GIF file from a directory containing the desired images, and saves it in that directory. Uses all images in the dir with the specified extension.
dir_path: string, file path to the desired directory.
ext: optional string, file type extension of desired images.
gif_name: optional string, file name of resultant GIF.
ms: int, milliseconds spent on each frame.
'''
img_paths = sorted(glob.glob(f'{dir_path}/*.{ext}'))
frames = [Image.open(image) for image in img_paths]
frame1 = frames[0]
frame1.save(f'{dir_path}/{gif_name}.gif', format='GIF', append_images=frames, save_all=True, duration=ms, loop=0) # Half a second per image.
# Ancilliary functions.
def format_date(date):
'''Turn date into dd/mm/yyyy format.
date: string of format yyyymmdd.
'''
return(f'{date[6:]}/{date[4:6]}/{date[0:4]}')
def mem():
'''Memory usage.'''
GB = 1000000000
mem_used = psutil.virtual_memory().used / GB
print(f'Current memory usage: {mem_used} GB.')