diff --git a/.gitignore b/.gitignore index 4a166cd..e42d956 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ *.err *.tmp *.xml +*.pyc diff --git a/orchard/gpaw_caller.py b/orchard/gpaw_caller.py index 3fdbdc1..d18e242 100644 --- a/orchard/gpaw_caller.py +++ b/orchard/gpaw_caller.py @@ -124,10 +124,10 @@ def get_nscf_energy_hybrid(atoms, settings, control): #atoms.calc.reset() #atoms.calc.initialize() e0 = atoms.get_potential_energy() - #atoms.calc.set(kpts=(8,8,8)) + #atoms.calc.new(kpts=(8,8,8)) settings['txt'] = settings.get('txt') or '-' #settings['verbose'] = settings.get('verbose') or 1 - atoms.calc.set(**settings) + atoms.calc.new(**settings) if control.get('parallel') is not None: atoms.calc.parallel.update(control['parallel']) e0t = atoms.get_potential_energy() diff --git a/orchard/gpaw_tasks.py b/orchard/gpaw_tasks.py index e2f292d..0130360 100644 --- a/orchard/gpaw_tasks.py +++ b/orchard/gpaw_tasks.py @@ -7,6 +7,7 @@ import time, yaml, os, subprocess, shutil, sys, shlex import ase.io from ase import Atoms +import psutil import copy @@ -41,6 +42,8 @@ def setup_gpaw_cmd(struct, settings_inp, nproc=None, cmd=None, update_only=False cmd = 'python {call_script} {settings_path}' else: cmd = 'mpirun -np {nproc} python {call_script} {settings_path}' + + print(f"Running GPAW with {nproc} processors") # Print the number of parallel processes used #logfile = settings_inp['calc'].get('txt') or DEFAULT_GPAW_CALC_SETTINGS.get('txt') #print('LOGFILE', logfile) @@ -92,10 +95,20 @@ def call_gpaw(cmd, logfile, require_converged=True): logfile = os.path.abspath(logfile) f = open(logfile, 'w') print('LOGFILE', logfile) + + # Print initial CPU and memory usage + print(f"Initial CPU usage: {psutil.cpu_percent()}%") + print(f"Initial Memory usage: {psutil.virtual_memory().percent}%") + start_time = time.monotonic() proc = subprocess.Popen(shlex.split(cmd), shell=False, stdout=f, stderr=f) proc.wait() stop_time = time.monotonic() + + # Print final CPU and memory usage + print(f"Final CPU usage: {psutil.cpu_percent()}%") + print(f"Final Memory usage: {psutil.virtual_memory().percent}%") + f.close() if proc.returncode != 0: successful = False @@ -122,6 +135,7 @@ class GPAWSinglePointSCF(FiretaskBase): optional_params = ['require_converged', 'method_description', 'nproc', 'cmd'] def run_task(self, fw_spec): + print(f"Starting SCF calculation for system: {self['system_id']}") if self.get('require_converged') is None: self['require_converged'] = True cmd, save_file, settings = setup_gpaw_cmd( @@ -136,6 +150,8 @@ def run_task(self, fw_spec): successful, update_spec, wall_time, logfile = call_gpaw( cmd, logfile, require_converged=self['require_converged'] ) + print(f"SCF calculation completed for system: {self['system_id']}, successful: {successful}") + struct = update_spec.get('struct') or self['struct'] update_spec.update({ diff --git a/orchard/pyscf_caller.py b/orchard/pyscf_caller.py index 86e047b..494c88f 100644 --- a/orchard/pyscf_caller.py +++ b/orchard/pyscf_caller.py @@ -167,9 +167,10 @@ def setup_calc(atoms, settings): calc = sgx.sgx_fit(calc, auxbasis=auxbasis, pjs=pjs) calc.with_df.__dict__.update(**sgx_params) elif settings['control']['density_fit']: - calc = calc.density_fit(only_dfj=settings['control'].get('only_dfj') or False) + # calc = calc.density_fit(only_dfj=settings['control'].get('only_dfj') or False) if settings['control'].get('df_basis') is not None: - calc.with_df.auxbasis = settings['control']['df_basis'] + # calc.with_df.auxbasis = settings['control']['df_basis'] + calc.density_fit(auxbasis=settings['control']['df_basis']) if settings['calc'].get('nlc') is not None: calc.nlcgrids.level = 1 @@ -190,8 +191,26 @@ def setup_calc(atoms, settings): calc.with_dftd3.xc = d3xc elif settings['control'].get('dftd4'): import dftd4.pyscf as pyd4 + + from dftd4.parameters import get_damping_param + # Print Settings + print("\n=== DFTD4 Settings ===") + print("Calculation XC:", settings["calc"].get('xc')) + print("DFTD4 functional:", settings["control"].get("dftd4_functional")) + calc = pyd4.energy(calc) d4func = settings['control'].get('dftd4_functional') + + # Print Parameters + print("\n=== DFTD4 Parameters ===") + if d4func is not None: + print("Explicitly specified parameters:", get_damping_param(d4func)) + if settings["calc"].get('xc') is None: + print("settings['calc'].get('xc') is None") + else: + print("Default parameters:", get_damping_param(settings["calc"].get('xc'))) + print("=====================\n") + if d4func is not None: calc.with_dftd4 = pyd4.DFTD4Dispersion( calc.mol, xc=d4func.upper().replace(" ", "") diff --git a/orchard/pyscf_tasks.py b/orchard/pyscf_tasks.py index db1f4ff..127a118 100644 --- a/orchard/pyscf_tasks.py +++ b/orchard/pyscf_tasks.py @@ -54,6 +54,8 @@ class SCFCalc(FiretaskBase): optional_params = ['require_converged', 'method_description'] def run_task(self, fw_spec): + mol_id = fw_spec['_tasks'][0].get('system_id', 'Unknown_Molecule') # 09/25/24 added for debugging + print(f"Running SCF calculation for molecule ID: {mol_id}") # 09/25/24 added for debugging settings = get_pyscf_settings(self['settings']) start_time = time.monotonic() calc = pyscf_caller.setup_calc(Atoms.fromdict(self['struct']), settings) @@ -91,6 +93,7 @@ def run_task(self, fw_spec): self['system_id'], functional=self['method_name'], ) + print(f"Loading SCF calculation for molecule ID: {self['system_id']}") # 09/25/24 added for debugging hdf5file = os.path.join(load_dir, 'data.hdf5') in_file = os.path.join(load_dir, 'run_info.yaml') with open(in_file, 'r') as f: @@ -119,6 +122,11 @@ class SCFCalcFromRestart(FiretaskBase): def run_task(self, fw_spec): settings = get_pyscf_settings(self['new_settings'], default_settings=fw_spec['settings']) + # 09/25/24 modified + # keep spin 和 charge from first step dft calculation results run_info.yaml + # instead of from new_settings(which is from xyz files) + settings['mol']['spin'] = fw_spec['settings']['mol']['spin'] + settings['mol']['charge'] = fw_spec['settings']['mol']['charge'] start_time = time.monotonic() calc = pyscf_caller.setup_calc(Atoms.fromdict(fw_spec['struct']), settings) calc.kernel(dm0=fw_spec['calc'].make_rdm1()) diff --git a/orchard/scripts/__pycache__/__init__.cpython-310.pyc b/orchard/scripts/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000..2c57b5d Binary files /dev/null and b/orchard/scripts/__pycache__/__init__.cpython-310.pyc differ diff --git a/orchard/scripts/__pycache__/compile_pyscf_dataset.cpython-310.pyc b/orchard/scripts/__pycache__/compile_pyscf_dataset.cpython-310.pyc new file mode 100644 index 0000000..f5111f1 Binary files /dev/null and b/orchard/scripts/__pycache__/compile_pyscf_dataset.cpython-310.pyc differ diff --git a/orchard/scripts/compile_pyscf_dataset.py b/orchard/scripts/compile_pyscf_dataset.py index 85ce8c5..326ffac 100644 --- a/orchard/scripts/compile_pyscf_dataset.py +++ b/orchard/scripts/compile_pyscf_dataset.py @@ -40,8 +40,13 @@ def get_feat_type(settings): def compile_single_system( - settings, save_file, analyzer_file, sparse_level, orbs, save_baselines + # settings, save_file, analyzer_file, sparse_level, orbs, save_baselines + settings, save_file, analyzer_file, sparse_level, orbs, save_baselines, ref_config ): + if settings == 'l': + with open(ref_config, 'r') as f: + ref_settings = yaml.load(f, Loader=yaml.CLoader) + print('ref_config', ref_config, 'ref_settings:', ref_settings) start = time.monotonic() analyzer = ElectronAnalyzer.load(analyzer_file) if sparse_level is not None: @@ -77,9 +82,18 @@ def compile_single_system( spinpol = False if settings == 'l': values = analyzer.get('ex_energy_density') - # TODO need to be able to generate reference data for range-separated exchange - # This function will fetch the range-separated exact exchange from the analysis - # values = analyzer.get_rs(omega) + ref_type = ref_settings.get('ref_type') + omega = ref_settings.get('omega') + print("omegaaaaaaaaaaaaaaaaaaaaaaaaaaaa:", omega) + term = ref_settings.get('energy_type') + if ref_type == 'e': + values = analyzer.get('ex_energy_density') + elif ref_type == 'sr' : + values = analyzer.get_rs(term, -omega) + elif ref_type == 'lr': + values = analyzer.get_rs(term, omega) + else: + raise TypeError('Invalid ref_type') weights = analyzer.grids.weights coords = analyzer.grids.coords if spinpol: @@ -94,11 +108,14 @@ def compile_single_system( 'wt' : weights } if orbs is not None: - data['dval'] = intk_to_strk(analyzer.calculate_vxc_on_mo('HF', orbs)) - # TODO need to be able to generate reference data for range-separated exchange - # This function will compute the contribution of short and long-range exchange to VXC - # data['dval'] = intk_to_strk(analyzer.calculate_vxc_on_mo('SR_HF(omega)', orbs)) - # data['dval'] = intk_to_strk(analyzer.calculate_vxc_on_mo('LR_HF(0.11)', orbs)) + if ref_type == 'e': + data['dval'] = intk_to_strk(analyzer.calculate_vxc_on_mo('HF', orbs)) + elif ref_type == 'sr': + data['dval'] = intk_to_strk(analyzer.calculate_vxc_on_mo(f'SR_HF({omega})', orbs)) + elif ref_type == 'lr': + data['dval'] = intk_to_strk(analyzer.calculate_vxc_on_mo(f'LR_HF({omega})', orbs)) + else: + raise TypeError('Invalid ref_type') data['drho_data'] = intk_to_strk(ddesc) data['eigvals'] = intk_to_strk(eigvals) if save_baselines: @@ -125,6 +142,7 @@ def compile_dataset( save_root, functional, basis, + ref_config, sparse_level=None, analysis_level=1, save_gap_data=False, @@ -171,8 +189,7 @@ def compile_dataset( print('Already exists, skipping:', mol_id) continue analyzer_file = data_dir + '/analysis_L{}.hdf5'.format(analysis_level) - args = [feat_settings, save_file, analyzer_file, - sparse_level, orbs, save_baselines] + args = [feat_settings, save_file, analyzer_file, sparse_level, orbs, save_baselines, ref_config] if make_fws: fwname = 'feature_{}_{}'.format(feat_name, mol_id) args[0] = yaml.dump(args[0], Dumper=yaml.CDumper) @@ -237,6 +254,7 @@ def main(): '--save-dir', default=None, type=str, help='override default save directory for features' ) + parser.add_argument('--ref-config', type=str, default=None, help='Path to the reference configuration YAML file') args = parser.parse_args() if args.settings_file is None or args.settings_file == '__REF__': @@ -245,6 +263,10 @@ def main(): with open(args.settings_file, 'r') as f: settings = yaml.load(f, Loader=yaml.CLoader) + # Convert ref_config to absolute path + if args.ref_config is not None and not os.path.isabs(args.ref_config): + args.ref_config = os.path.abspath(args.ref_config) + mol_id_list = load_mol_ids(args.mol_id_file) if args.mol_id_file.endswith('.yaml'): mol_id_code = args.mol_id_file[:-5] @@ -272,7 +294,8 @@ def main(): save_gap_data=args.save_gap_data, make_fws=args.make_fws, skip_existing=args.skip_existing, - save_dir=args.save_dir + save_dir=args.save_dir, + ref_config=args.ref_config ) if args.make_fws: from fireworks import LaunchPad, Firework diff --git a/orchard/workflow_utils.py b/orchard/workflow_utils.py index 8bedd15..70e7d7b 100644 --- a/orchard/workflow_utils.py +++ b/orchard/workflow_utils.py @@ -62,7 +62,11 @@ def load_rxns(rxn_list_id, rxndir=None): def read_accdb_structure(struct_id): - fname = '{}.xyz'.format(os.path.join(ACCDB_ROOT, 'Geometries', struct_id)) + # fname = '{}.xyz'.format(os.path.join(ACCDB_ROOT, 'Geometries', struct_id)) + # replace "=" "." with "_" to find the xyz files + struct_id_modified = struct_id.replace('=', '_').replace('.', '_') + # This modified id is only used inside this function, not gonna pass on to other places. The original struct_id is generally used. + fname = '{}.xyz'.format(os.path.join(ACCDB_ROOT, 'Geometries', struct_id_modified)) with open(fname, 'r') as f: #print(fname) lines = f.readlines()