Skip to content

Commit

Permalink
make committee learning robust towards datasets of varying number of …
Browse files Browse the repository at this point in the history
…atoms
  • Loading branch information
svandenhaute committed Dec 19, 2023
1 parent 4e67b85 commit 4561237
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 65 deletions.
4 changes: 2 additions & 2 deletions psiflow/committee.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,12 @@ def _compute_disagreements(
disagreements = np.zeros(lengths[0])
if metric == "mean_force":
for i in range(lengths[0]):
forces = np.zeros((len(inputs), len(data[0][0]), 3))
forces = np.zeros((len(inputs), len(data[0][i]), 3))
for j in range(len(inputs)):
if data[j][i] == NullState:
assert j == 0 # nullstates do not depend on j
break
forces[j] = data[j][i].arrays["forces"]
forces[j, :] = data[j][i].arrays["forces"]
SE = (forces - np.mean(forces, axis=0, keepdims=True)) ** 2
RMSE = np.sqrt(np.mean(SE))
disagreements[i] = RMSE
Expand Down
9 changes: 6 additions & 3 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import pytest
import yaml
from ase import Atoms
from ase.build import bulk
from ase.build import bulk, make_supercell
from ase.calculators.emt import EMT

import psiflow
Expand Down Expand Up @@ -94,8 +94,10 @@ def mace_config():
return asdict(mace_config)


def generate_emt_cu_data(nstates, amplitude):
atoms = bulk("Cu", "fcc", a=3.6, cubic=True)
def generate_emt_cu_data(nstates, amplitude, supercell=None):
if supercell is None:
supercell = np.eye(3)
atoms = make_supercell(bulk("Cu", "fcc", a=3.6, cubic=True), supercell)
atoms.calc = EMT()
pos = atoms.get_positions()
box = atoms.get_cell()
Expand All @@ -120,6 +122,7 @@ def generate_emt_cu_data(nstates, amplitude):
@pytest.fixture
def dataset(context):
data = generate_emt_cu_data(20, 0.2)
data += generate_emt_cu_data(5, 0.15, supercell=np.diag([1, 2, 1]))
data_ = [FlowAtoms.from_atoms(atoms) for atoms in data]
for atoms in data_:
atoms.reference_status = True
Expand Down
30 changes: 16 additions & 14 deletions tests/test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,21 +49,21 @@ def test_dataset_empty(tmp_path):


def test_dataset_append(dataset):
assert 20 == dataset.length().result()
l = dataset.length().result() # noqa: E741
atoms_list = dataset.as_list().result()
assert len(atoms_list) == 20
assert len(atoms_list) == l
assert type(atoms_list) is list
assert type(atoms_list[0]) is FlowAtoms
empty = Dataset([]) # use [] instead of None
empty.append(dataset)
assert 20 == empty.length().result()
assert l == empty.length().result()
dataset.append(dataset)
assert 40 == dataset.length().result()
assert 2 * l == dataset.length().result()
added = dataset + dataset
assert added.length().result() == 80
assert dataset.length().result() == 40 # may not changed
assert added.length().result() == 4 * l
assert dataset.length().result() == 2 * l # must not have changed
dataset += dataset
assert dataset.length().result() == 80 # may not changed
assert dataset.length().result() == 4 * l # must not have changed

# test canonical transformation
transformed = dataset.canonical_orientation()
Expand All @@ -90,10 +90,12 @@ def test_dataset_slice(dataset):
dataset_ = dataset.shuffle()
equal = np.array([False] * dataset.length().result())
for i in range(dataset.length().result()):
equal[i] = np.allclose(
dataset_[i].result().get_positions(),
dataset[i].result().get_positions(),
)
pos0 = dataset_[i].result().get_positions()
pos1 = dataset[i].result().get_positions()
if pos0.shape == pos1.shape:
equal[i] = np.allclose(pos0, pos1)
else:
equal[i] = False
assert not np.all(equal)


Expand All @@ -104,7 +106,7 @@ def test_dataset_from_xyz(tmp_path, dataset):
loaded = Dataset.load(path_xyz)
data = read(path_xyz, index=":")

for i in range(20):
for i in range(dataset.length().result()):
assert np.allclose(
dataset[i].result().get_positions(),
loaded[i].result().get_positions(),
Expand Down Expand Up @@ -266,9 +268,9 @@ def test_data_offset(dataset):
}
data = dataset.subtract_offset(**atomic_energies)
data_ = data.add_offset(**atomic_energies)
natoms = len(dataset[0].result())
offset = (natoms - 1) * atomic_energies["Cu"] + atomic_energies["H"]
for i in range(dataset.length().result()):
natoms = len(dataset[i].result())
offset = (natoms - 1) * atomic_energies["Cu"] + atomic_energies["H"]
assert np.allclose(
data[i].result().info["energy"],
dataset[i].result().info["energy"] - offset,
Expand Down
4 changes: 2 additions & 2 deletions tests/test_learning.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,8 @@ def test_sequential_learning(gpu, tmp_path, mace_config, dataset):
data = learning.run(model, reference, walkers, dataset)
psiflow.wait()
assert data.labeled().length().result() == len(walkers) + dataset.length().result()
assert learning.identifier.result() == 25
assert data.assign_identifiers().result() == 25
assert learning.identifier.result() == 5 + dataset.length().result()
assert data.assign_identifiers().result() == 5 + dataset.length().result()

data = learning.run(model, reference, walkers)
assert data.length().result() == 0 # iteration 0 already performed
Expand Down
47 changes: 3 additions & 44 deletions tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,47 +103,6 @@ def test_nequip_seed(nequip_config):
assert model.seed == 112


def test_nequip_offset(nequip_config, dataset):
config = NequIPConfig(**nequip_config)
model = NequIPModel(config)
model.initialize(dataset[:2])
with pytest.raises(AssertionError):
model.add_atomic_energy("H", 1) # cannot change this after initialization
assert not model.do_offset
errors = Dataset.get_errors(
dataset,
model.evaluate(dataset),
properties=["energy"],
)
assert np.mean(errors.result()) < 1e3 # in meV/atom

reference = EMTReference()
atomic_energies = {
"H": 3e2,
"Cu": reference.compute_atomic_energy("Cu", box_size=6), # future
}
errors_ = Dataset.get_errors(
dataset.subtract_offset(**atomic_energies),
model.evaluate(dataset),
)
assert np.mean(errors_.result()) > 1e3 # in meV/atom
model.reset()
for element, energy in atomic_energies.items():
model.add_atomic_energy(element, energy)
assert model.do_offset
model.initialize(dataset[:2])
errors_same = Dataset.get_errors(
dataset,
model.evaluate(dataset),
properties=["energy"],
)
assert np.allclose(
errors_same.result(),
errors.result(),
atol=1e-1,
)


def test_allegro_init(allegro_config, dataset):
model = AllegroModel(allegro_config)
model.seed = 1
Expand Down Expand Up @@ -325,7 +284,7 @@ def test_mace_seed(mace_config):
def test_mace_offset(mace_config, dataset, tmp_path):
config = MACEConfig(**mace_config)
model = MACEModel(config)
model.initialize(dataset[:2])
model.initialize(dataset)
with pytest.raises(AssertionError):
model.add_atomic_energy("H", 1) # cannot change this after initialization
assert not model.do_offset
Expand All @@ -350,7 +309,7 @@ def test_mace_offset(mace_config, dataset, tmp_path):
for element, energy in atomic_energies.items():
model.add_atomic_energy(element, energy)
assert model.do_offset
model.initialize(dataset[:2])
model.initialize(dataset)
errors_same = Dataset.get_errors(
dataset,
model.evaluate(dataset),
Expand All @@ -359,7 +318,7 @@ def test_mace_offset(mace_config, dataset, tmp_path):
assert np.allclose(
errors_same.result(),
errors.result(),
atol=1e-1,
atol=0.1,
)
model.save(tmp_path)
psiflow.wait()
Expand Down

0 comments on commit 4561237

Please sign in to comment.