Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 12 additions & 6 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
__pycache__/
*.py[cod]
*$py.class
core.python.*

# C extensions
*.so
Expand Down Expand Up @@ -163,18 +164,23 @@ dmypy.json
# mac
.DS_Store

# data
data/**

# config
./config/*

# results
results/**
**/*/results/

server/

main.py

# tests
testing/*
test*.py
test*.ipynb

checkpoints/

# misc
.flake8
.pylintrc
**/wandb/
*.out
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,9 @@
```
conda-merge env.common.yaml env.gpu.yaml > env.yaml
conda env create -f env.yaml
conda activate matdeeplearn
```

2. CPU-only machines:

1. M1 Macs (see https://github.com/pyg-team/pytorch_geometric/issues/4549):
Expand All @@ -27,6 +28,7 @@
```
conda-merge env.common.yaml env.cpu.yaml > env.yaml
conda env create -f env.yaml
conda activate matdeeplearn
```

3. Install package:
Expand Down
50 changes: 22 additions & 28 deletions configs/config.yml
Original file line number Diff line number Diff line change
@@ -1,33 +1,28 @@

trainer: property

task:
# run_mode: train
identifier: "my_train_job"

reprocess: False


parallel: True
# seed=0 means random initalization
seed: 0
#seed=0 means random initalization


# Defaults to run directory if not specified
# save_dir: "."
# checkpoint_dir: "."
write_output: True
parallel: True
#Training print out frequency (print per n number of epochs)
verbosity: 5



model:
name: CGCNN
load_model: False
save_model: True
model_path: "my_model.pth"
edge_steps: 50
self_loop: True
#model attributes
# model attributes
dim1: 100
dim2: 150
pre_fc_count: 1
Expand All @@ -42,8 +37,9 @@ model:

optim:
max_epochs: 250
max_checkpoint_epochs: 0
lr: 0.002
#Either custom or from torch.nn.functional library. If from torch, loss_type is TorchLossWrapper
# Either custom or from torch.nn.functional library. If from torch, loss_type is TorchLossWrapper
loss:
loss_type: "TorchLossWrapper"
loss_args: {"loss_fn": "l1_loss"}
Expand All @@ -57,33 +53,31 @@ optim:
scheduler_args: {"mode":"min", "factor":0.8, "patience":10, "min_lr":0.00001, "threshold":0.0002}

dataset:
processed: False # if False, need to preprocessor data and generate .pt file
# Whether to use "inmemory" or "large" format for pytorch-geometric dataset. Reccomend inmemory unless the dataset is too large
# dataset_type: "inmemory"
#Path to data files
processed: False
# Path to data files
src: "/global/cfs/projectdirs/m3641/Shared/Materials_datasets/MP_data_npj/raw/"
#Path to target file within data_path
# Path to target file within data_path
target_path: "/global/cfs/projectdirs/m3641/Shared/Materials_datasets/MP_data_npj/targets.csv"
#Path to save processed data.pt file
# Path to save processed data.pt file
pt_path: "/global/homes/s/shuyijia/datasets/MP_data_npj/"
#Format of data files (limit to those supported by ASE)
transforms:
- name: GetY
args:
index: 0
otf: False # Optional parameter, default is False
# Format of data files (limit to those supported by ASE)
data_format: "json"
#Method of obtaining atom idctionary: available:(onehot)
# Method of obtaining atom idctionary: available:(onehot)
node_representation: "onehot"
additional_attributes: []
#Print out processing info
# Print out processing info
verbose: True

#Loading dataset params
#Index of target column in targets.csv
target_index: 0

#graph specific settings
# Index of target column in targets.csv
# graph specific settings
cutoff_radius : 8.0
n_neighbors : 12
edge_steps : 50

#Ratios for train/val/test split out of a total of 1
# Ratios for train/val/test split out of a total of 1
train_ratio: 0.8
val_ratio: 0.05
test_ratio: 0.15
92 changes: 92 additions & 0 deletions configs/examples/config_alignn.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
trainer: property

task:
identifier: "alignn_train_100"
reprocess: False
parallel: True
seed: 0
save_dir: "."
checkpoint_dir: "."
write_output: True
parallel: True
# Training print out frequency (print per n number of epochs)
verbosity: 1


model:
name: ALIGNN
load_model: False
save_model: True
model_path: "alignn_model.pth"
alignn_layers: 4
gcn_layers: 4
atom_input_features: 114
edge_input_features: 50
triplet_input_features: 40
embedding_features: 64
hidden_features: 256
output_features: 1
min_edge_distance: 0.0
max_edge_distance: 8.0
link: "identity"

optim:
max_epochs: 100
lr: 0.001
# Either custom or from torch.nn.functional library. If from torch, loss_type is TorchLossWrapper
loss:
loss_type: "TorchLossWrapper"
loss_args: {"loss_fn": "mse_loss"}

batch_size: 64

optimizer:
optimizer_type: "AdamW"
optimizer_args: {"weight_decay": 0.00001}
scheduler:
scheduler_type: "OneCycleLR"
# Look further into steps per epoch, for now hardcoded calculation from paper
scheduler_args: {"max_lr": 0.001, "epochs": 300, "steps_per_epoch": 1}

dataset:
processed: False
# Path to data files
# src: "/global/cfs/projectdirs/m3641/Shared/Materials_datasets/MP_data_69K/raw/"
src: "/storage/home/hhive1/sbaskaran31/scratch/MP_data_69K/raw/"
# Path to target file within data_path
# target_path: "/global/cfs/projectdirs/m3641/Shared/Materials_datasets/MP_data_69K/targets.csv"
target_path: "/storage/home/hhive1/sbaskaran31/scratch/MP_data_69K/targets.csv"
# Path to save processed data.pt file (a directory path not filepath)
# pt_path: "/global/cfs/projectdirs/m3641/Sidharth/datasets/MP_data_69K/"
pt_path: "/storage/home/hhive1/sbaskaran31/scratch/MP_data_69K/"
transforms:
- name: GetY
args:
index: 0
otf: False
- name: NumNodeTransform
args:
otf: False
- name: LineGraphMod
args:
otf: False
- name: ToFloat
args:
otf: False
# Format of data files (limit to those supported by ASE)
data_format: "json"
# Method of obtaining atom idctionary: available:(onehot)
node_representation: "onehot"
additional_attributes: []
# Print out processing info
verbose: True
# Loading dataset params
# Index of target column in targets.csv
# graph specific settings
cutoff_radius : 8.0
n_neighbors : 12
edge_steps : 50
# Ratios for train/val/test split out of a total of 1
train_ratio: 0.8
val_ratio: 0.05
test_ratio: 0.15
98 changes: 98 additions & 0 deletions configs/examples/config_graphite.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@

trainer: property

task:
# run_mode: train
identifier: "alignn_train_100"

reprocess: False


parallel: True
seed: 0
#seed=0 means random initalization


write_output: True
parallel: True
#Training print out frequency (print per n number of epochs)
verbosity: 1



model:
name: ALIGNN_GRAPHITE
load_model: False
save_model: True
model_path: "alignn_graphite_model.pth"
num_interactions: 4
num_species: 3
cutoff: 3.0
dim: 64
# min_angle: float = 0.0,
# max_angle: float = torch.acos(torch.zeros(1)).item() * 2,
link: "identity"

optim:
max_epochs: 103
lr: 0.001
#Either custom or from torch.nn.functional library. If from torch, loss_type is TorchLossWrapper
loss:
loss_type: "TorchLossWrapper"
loss_args: {"loss_fn": "mse_loss"}

batch_size: 64

optimizer:
optimizer_type: "AdamW"
optimizer_args: {"weight_decay": 0.00001}
scheduler:
scheduler_type: "OneCycleLR"
# Look further into steps per epoch, for now hardcoded calculation from paper
scheduler_args: {"max_lr": 0.001, "epochs": 300, "steps_per_epoch": 1}

dataset:
processed: True # if False, need to preprocessor data and generate .pt file
# Whether to use "inmemory" or "large" format for pytorch-geometric dataset. Reccomend inmemory unless the dataset is too large
# dataset_type: "inmemory"
#Path to data files
src: "/global/cfs/projectdirs/m3641/Shared/Materials_datasets/MP_data_69K/raw/"
#Path to target file within data_path
target_path: "/global/cfs/projectdirs/m3641/Shared/Materials_datasets/MP_data_69K/targets.csv"
#Path to save processed data.pt file (a directory path not filepath)
pt_path: "/global/cfs/projectdirs/m3641/Sidharth/datasets/MP_data_69K/"
transforms:
- name: GetY
args:
index: 0
otf: False
- name: NumNodeTransform
args:
otf: False
- name: LineGraphMod
args:
otf: False
- name: ToFloat
args:
otf: False
#Format of data files (limit to those supported by ASE)
data_format: "json"
#Method of obtaining atom idctionary: available:(onehot)
node_representation: "onehot"
additional_attributes: []
#Print out processing info
verbose: True

#Loading dataset params
#Index of target column in targets.csv
target_index: 0

#graph specific settings
cutoff_radius : 8.0
n_neighbors : 12
edge_steps : 50

#Ratios for train/val/test split out of a total of 1
train_ratio: 0.8
val_ratio: 0.05
test_ratio: 0.15
2 changes: 1 addition & 1 deletion env.common.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,6 @@ dependencies:
- pre-commit
- numpy
- scipy
- ase=3.21.*
- ase==3.21.*
- black
- pandas
6 changes: 3 additions & 3 deletions matdeeplearn/common/config/build_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,9 @@ def create_dict_from_args(args: list, sep: str = "."):

def build_config(args, args_override):
# Open provided config file
assert os.path.exists(args.config_path), (
"Config file not found in " + args.config_path
)
assert os.path.exists(
args.config_path
), f"Config file not found in {str(args.config_path)}"
with open(args.config_path, "r") as ymlfile:
config = yaml.load(ymlfile, Loader=yaml.FullLoader)

Expand Down
Loading