Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 6 additions & 82 deletions configs/config_calculator.yml
Original file line number Diff line number Diff line change
@@ -1,26 +1,9 @@
trainer: matdeeplearn.trainers.PropertyTrainer

task:
run_mode: train
identifier: my_train_job
parallel: False
# If seed is not set, then it will be random every time
seed: 12345678
# Defaults to run directory if not specified
save_dir:
# continue from a previous job
continue_job: False
# spefcify if the training state is loaded: epochs, learning rate, etc
load_training_state: False
# Path to the checkpoint.pt file. The model used in the calculator will load parameters from this file.
checkpoint_path: results/2023-09-20-16-22-38-738-my_train_job/checkpoint/best_checkpoint.pt
# E.g. ["train", "val", "test"]
write_output: [train, val, test]
# Specify if labels are provided for the predict task
# labels: True
use_amp: True
checkpoint_path: ./checkpoints/cgcnn_checkpoint.pt

model:
# Model used by the calculator
name: CGCNN
# model attributes
dim1: 100
Expand All @@ -39,62 +22,12 @@ model:
# Compute edge attributes on the fly in the model forward
otf_edge_attr: True
# Compute node attributes on the fly in the model forward
otf_node_attr: True
otf_node_attr: False
model_ensemble: 1
# compute gradients w.r.t to positions and cell, requires otf_edge_attr=True
gradient: True

optim:
max_epochs: 40
max_checkpoint_epochs: 0
lr: 0.002
# Either custom or from torch.nn.functional library. If from torch, loss_type is TorchLossWrapper
loss:
loss_type: TorchLossWrapper
loss_args: {loss_fn: l1_loss}
# gradient clipping value
clip_grad_norm: 10
batch_size: 100
optimizer:
optimizer_type: AdamW
optimizer_args: {}
scheduler:
scheduler_type: ReduceLROnPlateau
scheduler_args: {mode: min, factor: 0.8, patience: 10, min_lr: 0.00001, threshold: 0.0002}
#Training print out frequency (print per n number of epochs)
verbosity: 5
# tdqm progress bar per batch in the epoch
batch_tqdm: False

dataset:
name: test_data
# Whether the data has already been processed and a data.pt file is present from a previous run
processed: False
# Path to data files - this can either be in the form of a string denoting a single path or a dictionary of {train: train_path, val: val_path, test: test_path, predict: predict_path}
src: data/force_data/data.json
# Path to target file within data_path - this can either be in the form of a string denoting a single path or a dictionary of {train: train_path, val: val_path, test: test_path} or left blank when the dataset is a single json file
# Example: target_path: "data/raw_graph_scalar/targets.csv"
target_path:
# Path to save processed data.pt file
pt_path: data/force_data/
# Either "node" or "graph" level
prediction_level: graph

transforms:
- name: GetY
args:
# index specifies the index of a target vector to predict, which is useful when there are multiple property labels for a single dataset
# For example, an index: 0 (default) will use the first entry in the target vector
# if all values are to be predicted simultaneously, then specify index: -1
index: -1
otf_transform: True # Optional parameter, default is True
# Format of data files (limit to those supported by ASE: https://wiki.fysik.dtu.dk/ase/ase/io/io.html)
data_format: json
# specify if additional attributes to be loaded into the dataset from the .json file; e.g. additional_attributes: [forces, stress]
additional_attributes:
# Print out processing info
verbose: True
# Index of target column in targets.csv
# graph specific settings
dataset:
preprocess_params:
# one of mdl (minimum image convention), ocp (all neighbors included)
edge_calc_method: ocp
Expand All @@ -118,13 +51,4 @@ dataset:
self_loop: True
# Method of obtaining atom dictionary: available: (onehot)
node_representation: onehot
all_neighbors: True

# Number of workers for dataloader, see https://pytorch.org/docs/stable/data.html
num_workers: 0
# Where the dataset is loaded; either "cpu" or "cuda"
dataset_device: cpu
# Ratios for train/val/test split out of a total of less than 1 (0.8 corresponds to 80% of the data)
train_ratio: 0.9
val_ratio: 0.05
test_ratio: 0.05
all_neighbors: True
130 changes: 96 additions & 34 deletions matdeeplearn/common/ase_utils.py
Original file line number Diff line number Diff line change
@@ -1,74 +1,75 @@
import torch
from typing import List
import logging

import numpy as np
import torch
import yaml
from ase import Atoms
from ase.geometry import Cell
from ase.calculators.calculator import Calculator
from matdeeplearn.preprocessor.helpers import generate_node_features
from torch_geometric.data.data import Data
from torch_geometric.loader import DataLoader
import logging
from typing import List

from matdeeplearn.common.registry import registry
from matdeeplearn.models.base_model import BaseModel
from matdeeplearn.preprocessor.helpers import generate_node_features


logging.basicConfig(level=logging.INFO)


class MDLCalculator(Calculator):
"""
A neural networked based Calculator that calculates the energy, forces and stress of a crystal structure.
"""
implemented_properties = ["energy", "forces", "stress"]

def __init__(self, config):
def __init__(self, config, rank='cuda:0'):
"""
Initialize the MDLCalculator instance.

Args:
config (str or dict): Configuration settings for the MDLCalculator.
- config (str or dict): Configuration settings for the MDLCalculator.
- rank (str): Rank of device the calculator calculates properties. Defaults to 'cuda:0'

Raises:
AssertionError: If the trainer name is not in the correct format or if the trainer class is not found.
- AssertionError: If the trainer name is not in the correct format or if the trainer class is not found.
"""
Calculator.__init__(self)

if isinstance(config, str):
logging.info(f'MDLCalculator instantiated from config: {config}')
with open(config, "r") as yaml_file:
config = yaml.safe_load(yaml_file)
elif isinstance(config, dict):
logging.info('MDLCalculator instantiated from a dictionary.')
else:
raise NotImplementedError('Unsupported config type.')

gradient = config["model"].get("gradient", False)
otf_edge_index = config["model"].get("otf_edge_index", False)
otf_edge_attr = config["model"].get("otf_edge_attr", False)
self.otf_node_attr = config["model"].get("otf_node_attr", False)
assert otf_edge_index and otf_edge_attr and gradient, "To use this calculator to calculate forces and stress, you should set otf_edge_index, oft_edge_attr and gradient to True."

trainer_name = config.get("trainer", "matdeeplearn.trainers.PropertyTrainer")
assert trainer_name.count(".") >= 1, "Trainer name should be in format {module}.{trainer_name}, like matdeeplearn.trainers.PropertyTrainer"

trainer_cls = registry.get_trainer_class(trainer_name)
load_state = config['task'].get('checkpoint_path', None)
assert trainer_cls is not None, "Trainer not found"
self.trainer = trainer_cls.from_config(config)

try:
self.trainer.load_checkpoint()
except ValueError:
logging.warning("No checkpoint.pt file is found, and an untrained model is used for prediction.")

self.device = rank if torch.cuda.is_available() else 'cpu'
self.models = MDLCalculator._load_model(config, self.device)
self.n_neighbors = config['dataset']['preprocess_params'].get('n_neighbors', 250)
self.device = 'cpu'

def calculate(self, atoms: Atoms, properties=implemented_properties, system_changes=None):
def calculate(self, atoms: Atoms, properties=implemented_properties, system_changes=None) -> None:
"""
Calculate energy, forces, and stress for a given ase.Atoms object.

Args:
atoms (ase.Atoms): The atomic structure for which calculations are to be performed.
properties (list): List of properties to calculate. Defaults to ['energy', 'forces', 'stress'].
system_changes: Not supported in the current implementation.
- atoms (ase.Atoms): The atomic structure for which calculations are to be performed.
- properties (list): List of properties to calculate. Defaults to ['energy', 'forces', 'stress'].
- system_changes: Not supported in the current implementation.

Returns:
None: The results are stored in the instance variable 'self.results'.
- None: The results are stored in the instance variable 'self.results'.

Note:
This method performs energy, forces, and stress calculations using a neural network-based calculator.
- This method performs energy, forces, and stress calculations using a neural network-based calculator.
The results are stored in the instance variable 'self.results' as 'energy', 'forces', and 'stress'.
"""
Calculator.calculate(self, atoms, properties, system_changes)
Expand All @@ -87,11 +88,20 @@ def calculate(self, atoms: Atoms, properties=implemented_properties, system_chan

data_list = [data]
loader = DataLoader(data_list, batch_size=1)
loader_iter = iter(loader)
batch = next(loader_iter).to(self.device)

out_list = []
for model in self.models:
out_list.append(model(batch))

out = self.trainer.predict_by_calculator(loader)
self.results['energy'] = out['energy']
self.results['forces'] = out['forces']
self.results['stress'] = out['stress']
energy = torch.stack([entry["output"] for entry in out_list]).mean(dim=0)
forces = torch.stack([entry["pos_grad"] for entry in out_list]).mean(dim=0)
stresses = torch.stack([entry["cell_grad"] for entry in out_list]).mean(dim=0)

self.results['energy'] = energy.detach().cpu().numpy()
self.results['forces'] = forces.detach().cpu().numpy()
self.results['stress'] = stresses.squeeze().detach().cpu().numpy()

@staticmethod
def data_to_atoms_list(data: Data) -> List[Atoms]:
Expand All @@ -101,11 +111,11 @@ def data_to_atoms_list(data: Data) -> List[Atoms]:
with its associated properties such as positions and cell.

Args:
data (Data): A data object containing information about atomic structures.
- data (Data): A data object containing information about atomic structures.

Returns:
List[Atoms]: A list of 'ase.Atoms' objects, each representing an atomic structure
with positions and associated properties.
- List[Atoms]: A list of 'ase.Atoms' objects, each representing an atomic structure
with positions and associated properties.
"""
cells = data.cell.numpy()

Expand All @@ -120,3 +130,55 @@ def data_to_atoms_list(data: Data) -> List[Atoms]:
for i in range(len(data.structure_id)):
atoms_list[i].structure_id = data.structure_id[i][0]
return atoms_list

@staticmethod
def _load_model(config: dict, rank: str) -> List[BaseModel]:
"""
This static method loads a model based on the provided configuration.

Parameters:
- config (dict): Configuration dictionary containing model and dataset parameters.
- rank: Rank information for distributed training.

Returns:
- model_list: A list of loaded models.
"""

graph_config = config['dataset']['preprocess_params']
model_config = config['model']

model_list = []
model_name = 'matdeeplearn.models.' + model_config["name"]
logging.info(f'MDLCalculator: setting up {model_name} for calculation')
# Obtain node, edge, and output dimensions for model initialization
for _ in range(model_config["model_ensemble"]):
node_dim = graph_config["node_dim"]
edge_dim = graph_config["edge_dim"]

model_cls = registry.get_model_class(model_name)
model = model_cls(
node_dim=node_dim,
edge_dim=edge_dim,
output_dim=1,
cutoff_radius=graph_config["cutoff_radius"],
n_neighbors=graph_config["n_neighbors"],
graph_method=graph_config["edge_calc_method"],
num_offsets=graph_config["num_offsets"],
**model_config
)
model = model.to(rank)
model_list.append(model)

checkpoints = config['task']["checkpoint_path"].split(',')
if len(checkpoints) == 0:
logging.warning("MDLCalculator: No checkpoint.pt file is found, and untrained models are used for prediction.")
else:
for i in range(len(checkpoints)):
try:
checkpoint = torch.load(checkpoints[i])
model_list[i].load_state_dict(checkpoint["state_dict"])
logging.info(f'MDLCalculator: weights for model No.{i+1} loaded from {checkpoints[i]}')
except ValueError:
logging.warning(f"MDLCalculator: No checkpoint.pt file is found for model No.{i+1}, and an untrained model is used for prediction.")

return model_list