From e7e3b8cb8940607cfc2639e6dfb0d925dcdf855d Mon Sep 17 00:00:00 2001 From: Weixiang Yu Date: Tue, 12 Dec 2023 21:44:12 +0000 Subject: [PATCH] add seed option to lc sim functions --- src/eztao/ts/carma_sim.py | 68 +++++++++++++++++++++++++++++++++------ src/eztao/ts/utils.py | 11 +++++-- tests/test_CARMAFit.py | 16 ++++++--- tests/test_CARMASim.py | 62 ++++++++++++++++++++++++++++++++++- 4 files changed, 140 insertions(+), 17 deletions(-) diff --git a/src/eztao/ts/carma_sim.py b/src/eztao/ts/carma_sim.py index ff22791..d2ecf6d 100644 --- a/src/eztao/ts/carma_sim.py +++ b/src/eztao/ts/carma_sim.py @@ -12,7 +12,27 @@ __all__ = ["gpSimFull", "gpSimRand", "gpSimByTime", "addNoise", "pred_lc"] -def gpSimFull(carmaTerm, SNR, duration, N, nLC=1, log_flux=True): +def gp_sample(gp, size=None, seed=None): + """Sample Celerite GP with a fixed seed""" + + if seed is not None: + rng = np.random.default_rng(seed=seed) + else: + rng = np.random.default_rng() + + gp._recompute() + if size is None: + n = rng.standard_normal(len(gp._t)) + else: + n = rng.standard_normal((len(gp._t), size)) + + n = gp.solver.dot_L(n) + if size is None: + return gp.mean.get_value(gp._t) + n[:, 0] + return gp.mean.get_value(gp._t)[None, :] + n.T + + +def gpSimFull(carmaTerm, SNR, duration, N, nLC=1, log_flux=True, lc_seed=None): """ Simulate CARMA time series using uniform sampling. @@ -25,6 +45,7 @@ def gpSimFull(carmaTerm, SNR, duration, N, nLC=1, log_flux=True): nLC (int, optional): Number of time series to simulate. Defaults to 1. log_flux (bool): Whether the flux/y values are in astronomical magnitude. This argument affects how errors are assigned. Defaults to True. + lc_seed (int): Random seed for time series simulation. Defaults to None. Raises: RuntimeError: If the input CARMA term/model is not stable, thus cannot be @@ -44,9 +65,14 @@ def gpSimFull(carmaTerm, SNR, duration, N, nLC=1, log_flux=True): "The covariance matrix of the provided CARMA term is not positive definite!" ) + if lc_seed is not None: + rng = np.random.default_rng(seed=lc_seed) + else: + rng = np.random.default_rng() + t = np.linspace(0, duration, N) noise = carmaTerm.get_rms_amp() / SNR - yerr = np.random.lognormal(0, 0.25, N) * noise + yerr = rng.lognormal(0, 0.25, N) * noise yerr = yerr[np.argsort(np.abs(yerr))] # small->large # init GP and solve matrix @@ -55,7 +81,7 @@ def gpSimFull(carmaTerm, SNR, duration, N, nLC=1, log_flux=True): # simulate, assign yerr based on y t = np.repeat(t[None, :], nLC, axis=0) - y = gp_sim.sample(size=nLC) + y = gp_sample(gp_sim, size=nLC, seed=lc_seed) # format yerr making it heteroscedastic yerr = np.repeat(yerr[None, :], nLC, axis=0) @@ -77,7 +103,16 @@ def gpSimFull(carmaTerm, SNR, duration, N, nLC=1, log_flux=True): def gpSimRand( - carmaTerm, SNR, duration, N, nLC=1, log_flux=True, season=True, full_N=10_000 + carmaTerm, + SNR, + duration, + N, + nLC=1, + log_flux=True, + season=True, + full_N=10_000, + lc_seed=None, + downsample_seed=None, ): """ Simulate CARMA time series randomly downsampled from a much denser full time series. @@ -95,12 +130,17 @@ def gpSimRand( to True. full_N (int, optional): The number of data points in the full time series (before downsampling). Defaults to 10_000. + lc_seed (int): Random seed for full time series simulation. Defaults to None. + downsample_seed (int): Random seed for downsampling the simulated full time + series. Defaults to None. Returns: (array(float), array(float), array(float)): Time stamps (default in day), y values and measurement errors of the simulated time series. """ - t, y, yerr = gpSimFull(carmaTerm, SNR, duration, full_N, nLC=nLC, log_flux=log_flux) + t, y, yerr = gpSimFull( + carmaTerm, SNR, duration, full_N, nLC=nLC, log_flux=log_flux, lc_seed=lc_seed + ) t = np.atleast_2d(t) y = np.atleast_2d(y) yerr = np.atleast_2d(yerr) @@ -116,7 +156,7 @@ def gpSimRand( mask1 = add_season(t[i]) else: mask1 = np.ones(t[i].shape[0], dtype=bool) - mask2 = downsample_byN(t[i, mask1], N) + mask2 = downsample_byN(t[i, mask1], N, seed=downsample_seed) tOut[i, :] = t[i, mask1][mask2] yOut[i, :] = y[i, mask1][mask2] yerrOut[i, :] = yerr[i, mask1][mask2] @@ -127,7 +167,7 @@ def gpSimRand( return tOut, yOut, yerrOut -def gpSimByTime(carmaTerm, SNR, t, factor=10, nLC=1, log_flux=True): +def gpSimByTime(carmaTerm, SNR, t, factor=10, nLC=1, log_flux=True, lc_seed=None): """ Simulate CARMA time series at desired time stamps. @@ -147,6 +187,7 @@ def gpSimByTime(carmaTerm, SNR, t, factor=10, nLC=1, log_flux=True): nLC (int, optional): Number of time series to simulate. Defaults to 1. log_flux (bool): Whether the flux/y values are in astronomical magnitude. This argument affects how errors are assigned. Defaults to True. + lc_seed (int): Random seed for time series simulation. Defaults to None. Returns: (array(float), array(float), array(float)): Time stamps (default in day), y @@ -158,7 +199,7 @@ def gpSimByTime(carmaTerm, SNR, t, factor=10, nLC=1, log_flux=True): # simulate full LC tFull, yFull, yerrFull = gpSimFull( - carmaTerm, SNR, duration, N=N, nLC=nLC, log_flux=log_flux + carmaTerm, SNR, duration, N=N, nLC=nLC, log_flux=log_flux, lc_seed=lc_seed ) tFull = np.atleast_2d(tFull) yFull = np.atleast_2d(yFull) @@ -177,7 +218,7 @@ def gpSimByTime(carmaTerm, SNR, t, factor=10, nLC=1, log_flux=True): return tOut, yOut, yerrOut -def addNoise(y, yerr): +def addNoise(y, yerr, seed=None): """ Add (gaussian) noise to the input simulated time series given the measurement uncertainties. @@ -185,12 +226,19 @@ def addNoise(y, yerr): y (array(float)): The 'clean' time series. yerr (array(float)): The measurement uncertainties for the input time series. + seed (int): Random seed for simulating noise. Defaults to None. Returns: array(float): A new time series with simulated (gaussian ) noise added on top. """ - vec_norm = np.vectorize(np.random.normal, signature="(n),(n)->(n)") + + if seed is not None: + rng = np.random.default_rng(seed=seed) + else: + rng = np.random.default_rng() + + vec_norm = np.vectorize(rng.normal, signature="(n),(n)->(n)") noise = vec_norm(np.zeros_like(y), yerr) return y + noise diff --git a/src/eztao/ts/utils.py b/src/eztao/ts/utils.py index f5d6c91..3e231fc 100644 --- a/src/eztao/ts/utils.py +++ b/src/eztao/ts/utils.py @@ -9,21 +9,28 @@ __all__ = ["downsample_byN", "add_season", "downsample_byTime", "median_clip"] -def downsample_byN(t, nObs): +def downsample_byN(t, nObs, seed): """ Randomly choose N data points from a given time series. Args: t (array(float)): Time stamps of the original time series. N (int): The number of entries in the final time series. + seed (int): Random seed for downsampling. Defaults to None. Returns: A 1d array booleans indicating which data points to keep. """ + + if seed is not None: + rng = np.random.default_rng(seed=seed) + else: + rng = np.random.default_rng() + # random choose index idx = np.arange(len(t)) mask = np.zeros_like(idx, dtype=np.bool_) - true_idx = np.random.choice(idx, nObs, replace=False) + true_idx = rng.choice(idx, nObs, replace=False) # assign chosen index to 1/True mask[true_idx] = 1 diff --git a/tests/test_CARMAFit.py b/tests/test_CARMAFit.py index 63609e2..6c9ff16 100644 --- a/tests/test_CARMAFit.py +++ b/tests/test_CARMAFit.py @@ -141,7 +141,9 @@ def test_drwFit(): def test_dhoFit(): - t1, y1, yerr1 = gpSimRand(dho1, 100, 365 * 10.0, 200, nLC=100, season=False) + t1, y1, yerr1 = gpSimRand( + dho1, 100, 365 * 10.0, 200, nLC=100, season=False, lc_seed=10 + ) best_fit_dho1 = np.array( Parallel(n_jobs=-1)( delayed(dho_fit)(t1[i], y1[i], yerr1[i]) for i in range(len(t1)) @@ -156,7 +158,9 @@ def test_dhoFit(): assert np.percentile(diff1, 75) < 0.25 # the second test will down scale lc by 1e6 - t2, y2, yerr2 = gpSimRand(dho2, 100, 365 * 10.0, 200, nLC=100, season=False) + t2, y2, yerr2 = gpSimRand( + dho2, 100, 365 * 10.0, 200, nLC=100, season=False, lc_seed=10 + ) best_fit_dho2 = np.array( Parallel(n_jobs=-1)( delayed(dho_fit)(t2[i], y2[i] / 1e6, yerr2[i] / 1e6) for i in range(len(t2)) @@ -172,7 +176,9 @@ def test_dhoFit(): def test_carmaFit(): - t1, y1, yerr1 = gpSimRand(carma30, 200, 365 * 10.0, 1000, nLC=150, season=False) + t1, y1, yerr1 = gpSimRand( + carma30, 200, 365 * 10.0, 1000, nLC=150, season=False, lc_seed=10 + ) best_fit_carma1 = np.array( Parallel(n_jobs=-1)( delayed(carma_fit)(t1[i], y1[i] / 1e-6, yerr1[i] / 1e-6, 3, 0, n_opt=30) @@ -189,7 +195,9 @@ def test_carmaFit(): assert np.percentile(diff1, 75) < 0.4 # the second test will down scale lc by 1e6 - t2, y2, yerr2 = gpSimRand(carma31, 300, 365 * 10.0, 1000, nLC=150, season=False) + t2, y2, yerr2 = gpSimRand( + carma31, 300, 365 * 10.0, 1000, nLC=150, season=False, lc_seed=10 + ) best_fit_carma2 = np.array( Parallel(n_jobs=-1)( delayed(carma_fit)(t2[i], y2[i], yerr2[i], 3, 1, n_opt=50) diff --git a/tests/test_CARMASim.py b/tests/test_CARMASim.py index 6fc9c6a..be6a814 100644 --- a/tests/test_CARMASim.py +++ b/tests/test_CARMASim.py @@ -26,9 +26,30 @@ def test_invalidSim(): t, y, yerr = gpSimFull(carma_invalid, 20, 365 * 10.0, 10000) +def test_simFullSeed(): + """Test the lc_seed option of gpSimFull.""" + + # SNR = 20 + nLC = 5 + for kernel in test_kernels: + t, y1, yerr1 = gpSimFull(kernel, 20, 365 * 10.0, 1000, nLC=nLC, lc_seed=10) + t, y2, yerr2 = gpSimFull(kernel, 20, 365 * 10.0, 1000, nLC=nLC, lc_seed=10) + + # agree between two separate calls + for i in range(nLC): + assert np.allclose(y1[i], y2[i]) + assert np.allclose(yerr1[i], yerr2[i]) + + # disagree between different simulations from one call + assert not np.allclose(y1[0], y1[nLC - 1]) + assert not np.allclose(y1[1], y1[nLC - 2]) + assert not np.allclose(yerr1[0], yerr1[nLC - 1]) + assert not np.allclose(yerr1[1], yerr1[nLC - 2]) + + def test_simRand(): """Test function gpSimRand.""" - # SNR = 10 + # SNR = 20 for kernel in test_kernels: t, y, yerr = gpSimRand(kernel, 20, 365 * 10.0, 150, nLC=100, season=False) log_amp = np.log(kernel.get_rms_amp()) @@ -53,6 +74,26 @@ def test_simRand(): assert (np.argsort(yF) == np.argsort(-yerrF)).all() +def test_simRandSeed(): + """Test the downsample_seed option of gpSimRand.""" + + # SNR = 20 + nLC = 1 + for kernel in test_kernels: + t1, y1, yerr1 = gpSimRand( + kernel, 20, 365 * 10.0, 150, nLC=nLC, lc_seed=10, downsample_seed=10 + ) + t2, y2, yerr2 = gpSimRand( + kernel, 20, 365 * 10.0, 150, nLC=nLC, lc_seed=10, downsample_seed=10 + ) + t3, y3, yerr3 = gpSimRand( + kernel, 20, 365 * 10.0, 150, nLC=nLC, lc_seed=10, downsample_seed=20 + ) + + assert np.allclose(t1, t2) + assert not np.allclose(t1, t3) + + def test_simByTime(): """Test function gpSimByTime.""" t = np.sort(np.random.uniform(0, 3650, 5000)) @@ -72,3 +113,22 @@ def test_simByTime(): # test single LC simulation tOut, yOut, yerrOut = gpSimByTime(dho2, SNR, t, nLC=1) assert tOut.shape[0] == yOut.shape[0] == yerrOut.shape[0] == t.shape[0] + + +def test_addNoiseSeed(): + """Test the seed option of addNoise.""" + + nLC = 1 + t1, y1, yerr1 = gpSimRand( + test_kernels[-1], 20, 365 * 10.0, 150, nLC=nLC, lc_seed=10, downsample_seed=10 + ) + t2, y2, yerr2 = gpSimRand( + test_kernels[-1], 20, 365 * 10.0, 150, nLC=nLC, lc_seed=10, downsample_seed=10 + ) + + noise_y1 = addNoise(y1, yerr1, seed=10) + noise_y2 = addNoise(y2, yerr2, seed=10) + noise_y3 = addNoise(y1, yerr1, seed=20) + + assert np.allclose(noise_y1, noise_y2) + assert not np.allclose(noise_y1, noise_y3)