-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmakeLifeSimpler.py
308 lines (231 loc) · 11.9 KB
/
makeLifeSimpler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
*Author:* Wilfried Mercier - IRAP
A set of useful functions to make life simpler when analysing data.
.. warning::
Some functions have not been updated or used in months and may not behave as would be expected.
"""
from numpy import ndarray
from typing import Callable, List, Union, Tuple, Dict
from scipy.stats import median_abs_deviation
import numpy as np
import numpy.random as rand
def inferError(function: Callable, parameters: List, err_params: List,
minBounds: List=None, maxBounds: List=None, num: int=1000, centralValue: Union[int, float]=None,
**kwargs) -> float:
r'''
Infer the 1 sigma error on the output of a function by estimating its standard deviation when perturbating its parameters according to their errors.
Errors are assumed to be Gaussian.
:param func function: function to infer the output error
:param list parameters: parameters to pass to the function. Order must be identical as in the function declaration.
:param list err_params: 1 sigma errors of the parameters. If the parameter is fixed or perfectly known, provide None instead to bypass the pertubation part.
:param list minBounds: (**Optional**) minimum limit allowed for the parameters when perturbating them. All perturbed parameters beyond these thresholds will be set to these values.
:param list maxBounds: (**Optional**) maximum limit allowed for the parameters when perturbating them. All perturbed parameters beyond these thresholds will be set to these values.
:param centralValue: (**Optional**) value used to compute the standard deviation (usually median or mean). If no value is provided, the median value of the perturbed iterations is used.
:type centralValue: int or float
:param int num: (**Optional**) number of iterations to perform
:param dict kwargs: additional parameters to pass onto the function at the end
:returns: 1 sigma error of the function around the median value
:rtype: float
:raises TypeError: if
* **num** is not an int
* **parameters** is not a list
* **err_params** is not a list
* **minBounds** is not a list
* **maxBounds** is not a list
* **centralValue** is neither int nor float
'''
if not isinstance(num, int):
raise TypeError('Number of iterations must be an integer.')
if not isinstance(parameters, list):
raise TypeError('Parameters must be given as a list.')
if not isinstance(err_params, list):
raise TypeError('Parameters errors must be given as a list.')
if minBounds is None:
minBounds = [None]*len(parameters)
if maxBounds is None:
maxBounds = [None]*len(parameters)
if not isinstance(minBounds, list):
raise TypeError('Minimum bounds should be given as a list.')
if not isinstance(maxBounds, list):
raise TypeError('Maximum bounds should be given as a list.')
# Generating the perturbations for each parameter
perturbations = []
for value, error, mini, maxi in zip(parameters, err_params, minBounds, maxBounds):
# Without error, we had the parameter value without perturbations
if error is None:
perturbations.append([value]*num)
else:
param = rand.normal(loc=value, scale=error, size=num)
# If there is a min or max bound, we clip outliers
if mini is not None:
param[param<mini] = mini
if maxi is not None:
param[param>maxi] = maxi
perturbations.append(param)
perturbations = transpose(perturbations)
# Apply the function to each element
result = np.asarray([function(*i, **kwargs) for i in perturbations])
if centralValue is None:
centralValue = np.nanmedian(result)
if not isinstance(centralValue, (int, float)):
raise TypeError('Central value should either be an int or a float.')
return median_abs_deviation(result-centralValue, axis=None, nan_policy='omit')
def transpose(lists: List[Union[List, Tuple]]) -> List[Union[List, Tuple]]:
r'''
Take the transpose of a list of lists/tuples.
All the inner lists are assumed to be of equal length, otherwise the transpose will not work correctly.
:param lists: list of all the lists or tuples to perform the transpose onto
:type lists: list[list] or lists[tuple]
:returns: the transposed list
:rtype: list
'''
if not isinstance(lists, list):
raise TypeError('lists parameter should be of type list only. Cheers !')
return list(zip(*lists))
def convertCoords(coordinates: Union[List, List[Dict]],
inSize: Tuple[int] = (200, 200), outSize: Tuple[int] = (31, 31),
conversionFactor: float = 1.0) -> List[Dict]:
r'''
Transforms the coordinates of a/many point(s) from one image to another
:param coordinates: coordinates of the points to convert from one image to another
:type coordinates: dict or list of dict
:param inSize: (**Optional**) size of the image the points are from
:type inSize: 2-tuple[int] or list[int]
:param outSize: (**Optional**) size of the image whereto we want to convert the positions of the points
:type outSize: 2-tuple[int] or list[int]
:param float conversionFactor: (**Optional**) numerical factor to convert the position from pixel to another relavant unit
:returns: a list of dictionnaries with transformed coordinates
:rtype: list[dict]
'''
try:
np.shape(coordinates)[0]
except:
coordinates = [coordinates]
for num, points in enumerate(coordinates):
for pos, key in enumerate(points.keys()):
coordinates[num][key] *= outSize[pos]/inSize[pos]*conversionFactor
return coordinates
def maskToRemoveVal(listOfArrays: List[ndarray],
val: Union[int, float] = None, keep: bool = True, astroTableMask: bool = False) -> ndarray:
r"""
Computes a mask by finding occurences in a list of arrays.
:param list[ndarray] listOfArrays: list of arrays from which the mask is built
:param val: (**Optional**) value to find. If None, it looks for nan values.
:type val: int or float or None
:param bool keep: (**Optional**) if True, it builds a mask with True everywhere the value val is encountered. If False, it does the opposite.
:param bool astroTableMask: (**Optional**) if True returns a mask from the astropy table column instead of looking for some value/nans with False values everywhere the data is masked.
:returns: a mask
:rtype: ndarray
:raises ValueError: if the arrays do not have the same shape
"""
shp = listOfArrays[0].shape
#Checking that arrays have the same shape
for array in listOfArrays[1:]:
if shp != array.shape:
raise ValueError("Arrays do not have the same dimensions, thus making the masking operation unfit.")
#Constructing first mask
if astroTableMask:
tmp = np.logical_not(listOfArrays[0].mask)
elif val is None:
tmp = np.logical_not(np.isnan(listOfArrays[0]))
else:
tmp = listOfArrays[0] == val
if not keep:
tmp = np.logical_not(tmp)
#Applying logical and on all the masks
for (num, array) in enumerate(listOfArrays[1:]):
#consider we are looking for nan in the arrays
if astroTableMask:
tmp = np.logical_and(tmp, np.logical_not(array.mask))
elif val is None:
tmp = np.logical_and(tmp, np.logical_not(np.isnan(array)))
else:
if keep:
tmp = np.logical_and(tmp, array==val)
else:
tmp = np.logical_and(tmp, array != val)
return tmp
def logicalAndFromList(lst: List[ndarray]) -> ndarray:
r"""
Compute the intersection of all the subarrays in the main array
:param lst: list of arrays containing True or False values
:type lst: list[ndarray]
:returns: np.logical_and applied on all the subarrays
:rtype: ndarray
"""
tmp = np.logical_and(lst[0], lst[1])
for i in range(2, len(lst)):
tmp = np.logical_and(tmp, lst[i])
return tmp
def applyMask(listOfArrays: List[ndarray], mask: ndarray) -> List[ndarray]:
r"""
Apply the same mask to a list of arrays and return the new arrays.
:param list[ndarray] listOfArrays: list of arrays the mask is applied to
:param ndarray mask: mask to apply
:returns: list of arrays with the mask applied. If len(listOfArrays) is 1, it returns only an array instead of a list of arrays with one object.
:rtype: list[ndarray] if len(listOfArrays) != 1 else ndarray
"""
for (num, array) in enumerate(listOfArrays):
if len(listOfArrays) == 1:
listOfArrays = array[mask]
else:
listOfArrays[num] = array[mask]
return listOfArrays
def findWhereIsValue(listOfArrays: List[ndarray], val: Union[int, float] = None) -> List[bool]:
"""
Find and print the first position where a value is found within a list of arrays.
:param list[ndarray] listOfArrays: list from which the value val is searched
:param val: (**Optional**) value to look for. If None, it looks for nan values.
:type val: int or float or None
:returns: list of booleans with the same length as listOfArrays, with True when the value was found in the array and False otherwise.
:rtype: list[bool]
"""
returnArr = []
for (num, array) in enumerate(listOfArrays):
if val is None:
if np.any(np.isnan(array)):
returnArr.append(True)
print("A nan was found at position", np.where(np.isnan(array))[0], "within array number", num)
else:
returnArr.append(False)
print("No nan was found in array number", num)
else:
if np.asarray(np.where(array==val)).shape[1] == 0:
returnArr.append(False)
print("No value", val, "found within array number", num)
else:
returnArr.append(True)
print("Value", val, "found at position", np.where((array==val))[0], "within array number", num)
return returnArr
def checkDupplicates(master: List[ndarray], names: List[str] = None) -> None:
"""
Check if galaxies are found multiple times in an array by looking for duplicates of (RA, DEC) pairs.
:param master: list of structured arrays to check
:type master: list of structured ndarrays (with 'RA' and 'DEC' fields)
:param list[str] names: (**Optional**) names of the arrays
:returns: None
"""
if (names is None) or (len(names) != len(master)):
try:
len(names) != len(master)
print("Given names were not enough. Using position in the list as name instead.")
except TypeError:
pass
names = np.char.array(['catalog nb ']*len(master)) + np.char.array(np.array(range(len(master)), dtype='str'))
for catalog, nameCat in zip(master, names):
cnt = True
for ra, dec, nb in zip(catalog['RA'], catalog['DEC'], range(catalog['RA'].shape[0])):
where1 = np.where(catalog['RA']==ra)[0]
where2 = np.where(catalog['DEC']==dec)[0]
if (len(where1)>1) and (len(where2)>1):
flag = True
for w in where2:
if flag and (w in where1):
print("RA =", ra, "deg and DEC =", dec, "deg galaxy (line " + str(nb) + ") is present more than once in catalog", nameCat)
flag = False
cnt = False
if cnt:
print("All the galaxies are only listed once in the catalog", nameCat)
return