-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlr_tts.py
109 lines (89 loc) · 3.12 KB
/
lr_tts.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
'''
Name: Sophie Turner.
Date: 11/8/2024.
Contact: st838@cam.ac.uk
Try to predict UKCA J rates with Lasso regression using UKCA data as inputs.
For use on Cambridge chemistry department's atmospheric servers.
Files are located at scratch/$USER/netscratch_all/st838.
'''
import glob
import numpy as np
import file_paths as paths
import matplotlib.pyplot as plt
from sklearn import linear_model
import prediction_fns_numpy as fns
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
data_file = f'{paths.npy}/4days.npy'
#data_file = f'{paths.npy}/20150715.npy'
# Indices of some common combinations to use as inputs and outputs.
phys_all = np.arange(15, dtype=int)
# Best physics inputs from feature selection.
phys_best = [1,7,8,9,10,14]
NO2 = 16
HCHOr = 18 # Radical product.
HCHOm = 19 # Molecular product.
NO3 = 25
H2SO4 = 29
HOCl = 71
H2O2 = 74
O3 = 78 # O(1D) product.
# J rates which are not summed or duplicate fg.
J_core = [16,18,19,20,24,25,28,29,30,31,32,33,34,36,51,52,53,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82]
# The above with usually zero rates removed.
J_core = [16,18,19,20,24,25,28,30,31,32,33,51,52,66,68,70,71,72,73,74,75,76,78,79,80,81,82]
# Some favourites for testing.
J_test = [NO2,HCHOm,H2O2,O3]
J_varied = [NO2,HCHOm,NO3,H2SO4,HOCl,H2O2,O3]
data = np.load(data_file)
# Split the dataset by Fast-J cutoff pressure.
_, data = fns.split_pressure(data)
# Cut the higher altitudes off where Fast-JX might not work properly.
# 25km / 85km = 29%
#top = 0.29642513
#data = data[:, np.where(data[1] <= top)].squeeze()
# Pick out one time step.
#data = data[:, np.where(data[0] == 12.)].squeeze()
# Pick out one tropospheric level.
# About the top of Mt Everest.
# 9km / 85km ~= 10.6%
#trop = 0.10369537
#data = data[:, np.where(data[1] == trop)].squeeze()
# Pick out one column.
#data = data[:, np.where(data[2] == 51.875)].squeeze() # lat.
#data = data[:, np.where(data[3] == 359.0625)].squeeze() # lon.
# Input data.
inputs = data[phys_best]
inputs = np.swapaxes(inputs, 0, 1)
print('\nInputs:', inputs.shape)
# Test all the targets.
for target_idx in [HCHOm]:
target = data[target_idx]
print('\nTarget:', target_idx, target.shape)
in_train, in_test, out_train, out_test = train_test_split(inputs, target, test_size=0.1, random_state=6, shuffle=False)
# Standardisation (optional).
scaler = StandardScaler()
in_train = scaler.fit_transform(in_train)
in_test = scaler.fit_transform(in_test)
# Find suitable size of Lasso regularisation const.
avg = np.mean(out_train)
e = np.floor(np.log10(avg)) - 1
reg = 10**e
# Train regression model.
#model = linear_model.Lasso(reg)
model = linear_model.LinearRegression()
print(model)
model.fit(in_train, out_train)
# Test.
pred, mse, mape, r2 = fns.test(model, in_test, out_test)
# Make them the right shape.
pred = pred.squeeze()
out_test = out_test.squeeze()
# Plotting this many datapoints is excessive and costly. Reduce it to 10%.
length = len(pred)
idxs = np.arange(0, length, 10)
pred = pred[idxs]
out_test = out_test[idxs]
del(idxs)
# Plot.
fns.show(out_test, pred, mse, r2)