Skip to content

Commit

Permalink
Checking md5 of workflow and parameters files (#28)
Browse files Browse the repository at this point in the history
* Added saving md5 of workflow and parameters files inside of ExecutionState class
* Added comparison of new and previous md5's
  • Loading branch information
Claptar committed Jul 26, 2022
1 parent 9790856 commit 349041a
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 10 deletions.
4 changes: 3 additions & 1 deletion run_galaxy_workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,9 @@ def main():
gi = GalaxyInstance(ins['url'], key=ins['key'])
validate_dataset_id_exists(gi, inputs_data)

state = ExecutionState.start(path=args.state_file)
state = ExecutionState.start(path=args.state_file,
workflow_path=args.workflow,
parameters_path=args.parameters)

# Create new history to run workflow
if state.input_history is None:
Expand Down
34 changes: 25 additions & 9 deletions wfexecutor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import yaml
import json
import pickle
from hashlib import md5


def get_instance(conf, name='__default'):
Expand Down Expand Up @@ -402,6 +403,7 @@ def process_allowed_errors(allowed_errors_dict, wf_from_json):

return allowed_errors_state


def produce_versions_file(gi, workflow_from_json, table_path, tools_dict=[]):
"""
Produces a tool versions file for the workflow run.
Expand Down Expand Up @@ -449,24 +451,38 @@ class ExecutionState(object):
results = None
input_history = None

def __init__(self, path):
self.path = path
def __init__(self, state_path, workflow_path, parameters_path):
self.state_path = state_path
self.workflow_hex = self.get_file_md5(workflow_path)
self.parameters_hex = self.get_file_md5(parameters_path)

@staticmethod
def get_file_md5(file_path):
with open(file_path, 'rb') as f:
return md5(f.read()).hexdigest()

@staticmethod
def start(path):
if os.path.isfile(path):
def start(state_path, workflow_path, parameters_path):
if os.path.isfile(state_path):
try:
with open(path, mode='rb') as d:
with open(state_path, mode='rb') as d:
es = pickle.load(d)
if type(es) is ExecutionState:
# Check if there are any changes in the parameters and workflow files
if ExecutionState.get_file_md5(workflow_path) != es.workflow_hex:
logging.warning(f"The workflow file's ({workflow_path}) changed from the previous run. "
"Delete .pickle file if you want to run workflow from the beginning.")
if ExecutionState.get_file_md5(parameters_path) != es.parameters_hex:
logging.warning(f"The para file's ({parameters_path}) changed from the previous run. "
"Delete .pickle file if you want to run workflow from the beginning.")
return es
else:
logging.warning("The provided file {} does not have an ExecutionState object serialised".format(path))
logging.warning("The provided file {} does not have an ExecutionState object serialised".format(state_path))
except Exception:
logging.warning("Could not read serialized file {}.".format(path))
return ExecutionState(path)
logging.warning("Could not read serialized file {}.".format(state_path))
return ExecutionState(state_path, workflow_path, parameters_path)

def save_state(self):
with open(self.path, mode='wb') as d:
with open(self.state_path, mode='wb') as d:
pickle.dump(self, d)

0 comments on commit 349041a

Please sign in to comment.