Skip to content

Commit bd41984

Browse files
committed
Prevent transient-storage based clusters from prompting for cluster type upon cluster reboot
1 parent 09bda4b commit bd41984

File tree

3 files changed

+63
-15
lines changed

3 files changed

+63
-15
lines changed

cm/app.py

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import config
22
import logging
33
import logging.config
4+
import os
45
import sys
56
from cm.clouds.cloud_config import CloudConfig
67
from cm.framework import messages
@@ -40,6 +41,10 @@ def __init__(self, **kwargs):
4041
print "Python version: ", sys.version_info[:2]
4142
self.PERSISTENT_DATA_VERSION = 3 # Current expected and generated PD version
4243
self.DEPLOYMENT_VERSION = 2
44+
# Instance persistent data file. This file gets created for
45+
# test/transient cluster types and stores the cluster config. In case
46+
# of a reboot, read the file to automatically recreate the services.
47+
self.INSTANCE_PD_FILE = '/mnt/persistent_data-current.yaml'
4348
cc = CloudConfig(app=self)
4449
# Get the type of cloud currently running on
4550
self.cloud_type = cc.get_cloud_type()
@@ -96,21 +101,30 @@ def __init__(self, **kwargs):
96101
# This enables cluster configuration to be recovered on cluster re-
97102
# instantiation
98103
self.manager = None
104+
pd = None
99105
if self.use_object_store and 'bucket_cluster' in self.config:
100-
log.debug("Getting pd.yaml")
106+
log.debug("Looking for existing cluster persistent data (PD).")
101107
validate = True if self.cloud_type == 'ec2' else False
102108
if not self.TESTFLAG and misc.get_file_from_bucket(
103109
self.cloud_interface.get_s3_connection(),
104110
self.config['bucket_cluster'],
105111
'persistent_data.yaml', 'pd.yaml',
106112
validate=validate):
107-
pd = misc.load_yaml_file('pd.yaml')
108-
self.config.user_data = misc.merge_yaml_objects(self.config.user_data, pd)
109-
self.config.user_data = misc.normalize_user_data(self, self.config.user_data)
110-
else:
111-
log.debug("Setting deployment_version to {0}".format(self.DEPLOYMENT_VERSION))
112-
# This is a new cluster so default to the current version
113-
self.config.user_data['deployment_version'] = self.DEPLOYMENT_VERSION
113+
log.debug("Loading bucket PD file pd.yaml")
114+
pd = misc.load_yaml_file('pd.yaml')
115+
# Have not found the file in the cluster bucket, look on the instance
116+
if not pd:
117+
if os.path.exists(self.INSTANCE_PD_FILE):
118+
log.debug("Loading instance PD file {0}".format(self.INSTANCE_PD_FILE))
119+
pd = misc.load_yaml_file(self.INSTANCE_PD_FILE)
120+
if pd:
121+
self.config.user_data = misc.merge_yaml_objects(self.config.user_data, pd)
122+
self.config.user_data = misc.normalize_user_data(self, self.config.user_data)
123+
else:
124+
log.debug("No PD to go by. Setting deployment_version to {0}."
125+
.format(self.DEPLOYMENT_VERSION))
126+
# This is a new cluster so default to the current deployment version
127+
self.config.user_data['deployment_version'] = self.DEPLOYMENT_VERSION
114128

115129
def startup(self):
116130
if 'role' in self.config:

cm/master.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -409,6 +409,7 @@ def start(self):
409409
if not self.initial_cluster_type: # this can get set by _handle_old_cluster_conf_format
410410
self.initial_cluster_type = self.app.config.get('cluster_type', None)
411411
self.userdata_cluster_type = self.app.config.get('initial_cluster_type', None)
412+
self.cluster_storage_type = self.app.config.get('cluster_storage_type', None)
412413
if self.initial_cluster_type is not None:
413414
cc_detail = "Configuring a previously existing cluster of type {0}"\
414415
.format(self.initial_cluster_type)
@@ -984,6 +985,7 @@ def shutdown(self, sd_apps=True, sd_filesystems=True, sd_instances=True,
984985
# will persist so no point in poluting the list of buckets)
985986
if delete_cluster or (self.cluster_storage_type == 'transient' and not rebooting):
986987
self.delete_cluster()
988+
misc.remove(self.app.INSTANCE_PD_FILE)
987989
self.cluster_status = cluster_status.TERMINATED
988990
log.info("Cluster %s shut down at %s (uptime: %s). If not done automatically, "
989991
"manually terminate the master instance (and any remaining instances "
@@ -2357,6 +2359,7 @@ def create_cluster_config_file(self, file_name='persistent_data-current.yaml', a
23572359
cc['filesystems'] = fss
23582360
cc['services'] = svcs
23592361
cc['cluster_type'] = self.app.manager.initial_cluster_type
2362+
cc['cluster_storage_type'] = self.app.manager.cluster_storage_type
23602363
cc['cluster_name'] = self.app.config['cluster_name']
23612364
cc['placement'] = self.app.cloud_interface.get_zone()
23622365
cc['machine_image_id'] = self.app.cloud_interface.get_ami()
@@ -2382,10 +2385,15 @@ def store_cluster_config(self):
23822385
In addition, store the local Galaxy configuration files to the cluster's
23832386
bucket (do so only if they are not already there).
23842387
"""
2388+
# Create a cluster configuration file
2389+
cc_file_name = self.create_cluster_config_file()
23852390
if self.app.manager.initial_cluster_type == 'Test' or \
23862391
self.app.manager.cluster_storage_type == 'transient':
2392+
# Place the cluster configuration file to a locaiton that lives
2393+
# across cluster reboots
2394+
misc.move(cc_file_name, self.app.INSTANCE_PD_FILE)
23872395
log.debug("This is a transient cluster; we do not create a cluster "
2388-
"bucket or store cluster configuration for this type.")
2396+
"bucket to store cluster configuration for this type.")
23892397
return
23902398
log.debug("Storing cluster configuration to cluster's bucket")
23912399
s3_conn = self.app.cloud_interface.get_s3_connection()
@@ -2397,7 +2405,6 @@ def store_cluster_config(self):
23972405
misc.create_bucket(s3_conn, self.app.config['bucket_cluster'])
23982406
# Save/update the current Galaxy cluster configuration to cluster's
23992407
# bucket
2400-
cc_file_name = self.create_cluster_config_file()
24012408
misc.save_file_to_bucket(s3_conn, self.app.config['bucket_cluster'],
24022409
'persistent_data.yaml', cc_file_name)
24032410
log.debug("Saving current instance boot script (%s) to cluster bucket "

cm/util/misc.py

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -491,7 +491,6 @@ def adjust_bucket_acl(s3_conn, bucket_name, users_whose_grant_to_remove):
491491
# through the list of grants for bucket's users and the list of users
492492
# whose grant to remove and create a list of bucket grants to keep
493493
for g in bucket.get_acl().acl.grants:
494-
# log.debug("Grant -> permission: %s, user name: %s, grant type: %s" % (g.permission, g.display_name, g.type))
495494
# Public (i.e., group) permissions are kept under 'type' field
496495
# so check that first
497496
if g.type == 'Group' and 'Group' in users_whose_grant_to_remove:
@@ -567,7 +566,8 @@ def file_in_bucket_older_than_local(s3_conn, bucket_name, remote_filename, local
567566
local_filename, remote_filename, e))
568567
return True
569568
else:
570-
log.debug("Checking age of file in bucket (%s) against local file (%s) but file in bucket is None; updating file in bucket."
569+
log.debug("Checking age of file in bucket (%s) against local file (%s) "
570+
"but file in bucket is None; updating file in bucket."
571571
% (remote_filename, local_filename))
572572
return True
573573

@@ -668,6 +668,7 @@ def delete_file_from_bucket(conn, bucket_name, remote_filename):
668668
remote_filename, bucket_name, e))
669669
return False
670670

671+
671672
def update_file_in_bucket(conn, bucket_name, local_filepath):
672673
"""
673674
Updates file in bucket from its local counterpart.
@@ -689,9 +690,7 @@ def update_file_in_bucket(conn, bucket_name, local_filepath):
689690
"cluster bucket '%s' as '%s'" %
690691
(local_filepath, bucket_name,
691692
filename))
692-
save_file_to_bucket(conn,
693-
bucket_name,
694-
filename, local_filepath)
693+
save_file_to_bucket(conn, bucket_name, filename, local_filepath)
695694
else:
696695
log.debug("No instance post start script (%s)" % local_filepath)
697696
else:
@@ -981,6 +980,34 @@ def make_dir(path, owner=None):
981980
log.debug("Directory '%s' exists." % path)
982981

983982

983+
def move(source, destination):
984+
"""
985+
Move the ``source`` file to ``destination``.
986+
987+
A convenience wrapper for python's ``shutil.move`` method that simply
988+
wrapps the call in a try/catch block.
989+
"""
990+
try:
991+
log.debug('Moving file {0} to {1}'.format(source, destination))
992+
shutil.move(source, destination)
993+
except IOError, ioe:
994+
log.error("IOError moving {0} to {1}: {2}".format(source, destination, ioe))
995+
996+
997+
def remove(path):
998+
"""
999+
Remove (delete) the file ``path``.
1000+
1001+
A convenience wrapper for python's ``os.remove`` method that simply
1002+
wrapps the call in a try/catch block.
1003+
"""
1004+
try:
1005+
log.debug('Removing file {0}'.format(path))
1006+
os.remove(path)
1007+
except IOError, ioe:
1008+
log.error("IOError removing {0}: {2}".format(path, ioe))
1009+
1010+
9841011
def add_to_etc_hosts(ip_address, hosts=[]):
9851012
"""
9861013
Add a line with the list of ``hosts`` for the given ``ip_address`` to

0 commit comments

Comments
 (0)