diff --git a/cthulhu/cthulhu/manager/rpc.py b/cthulhu/cthulhu/manager/rpc.py index c2cb1c89a..4785b1f53 100644 --- a/cthulhu/cthulhu/manager/rpc.py +++ b/cthulhu/cthulhu/manager/rpc.py @@ -10,8 +10,9 @@ from cthulhu.manager import config from cthulhu.log import log from calamari_common.types import OsdMap, SYNC_OBJECT_STR_TYPE, OSD, OSD_MAP, POOL, CLUSTER, CRUSH_NODE, CRUSH_MAP, CRUSH_RULE, CRUSH_TYPE, ServiceId,\ - NotFound, SERVER + NotFound, SERVER, MON from cthulhu.manager.user_request import SaltRequest +from dateutil.parser import parse as dateutil_parse class RpcInterface(object): @@ -429,6 +430,59 @@ def status_by_service(self, services): return [({'running': ss.running, 'server': ss.server_state.fqdn, 'status': ss.status} if ss else None) for ss in result] + def _get_up_mon_servers(self, fsid): + # Resolve FSID to list of mon FQDNs + servers = self.server_list_cluster(fsid) + # Sort to get most recently contacted server first; drop any + # for whom last_contact is None + servers = [s for s in servers if s['last_contact']] + servers = sorted(servers, + key=lambda t: dateutil_parse(t['last_contact']), + reverse=True) + mon_fqdns = [] + for server in servers: + for service in server['services']: + service_id = ServiceId(*(service['id'])) + if service['running'] and service_id.service_type == MON and service_id.fsid == fsid: + mon_fqdns.append(server['fqdn']) + + return mon_fqdns + + def run_mon_job(self, fsid, job_cmd, job_args): + """ + Attempt to run a Salt job on a mon server, trying each until we find one + where the job runs (where running includes running and returning an error) + """ + + # TODO: in order to support radosgw-admin commands we might need to be able to identify running RGW services + # alternatively it may be possible to run radosgw-admin on a mon node that isn't running the RGW service + mon_fqdns = self._get_up_mon_servers(fsid) + + client = LocalClient(config.get('cthulhu', 'salt_config_path')) + log.debug("run_mon_job: mons for %s are %s" % (fsid, mon_fqdns)) + # For each mon FQDN, try to go get ceph/$cluster.log, if we succeed return it, if we fail try the next one + # NB this path is actually customizable in ceph as `mon_cluster_log_file` but we assume user hasn't done that. + for mon_fqdn in mon_fqdns: + results = client.cmd(mon_fqdn, job_cmd, job_args) + if results: + return results[mon_fqdn] + else: + log.info("Failed execute mon command on %s" % mon_fqdn) + + # If none of the mons gave us what we wanted, return a 503 service unavailable + raise RuntimeError("No mon servers are responding") + + def run_job(self, fqdn, job_cmd, job_args): + """ + Attempt to run a Salt job on a specific server. + """ + client = LocalClient(config.get('cthulhu', 'salt_config_path')) + results = client.cmd(fqdn, job_cmd, job_args) + if not results: + raise RuntimeError("Server '{0}' not responding".format(fqdn)) + else: + return results[fqdn] + class RpcThread(gevent.greenlet.Greenlet): """ diff --git a/rest-api/calamari_rest/views/remote_view_set.py b/rest-api/calamari_rest/views/remote_view_set.py deleted file mode 100644 index a4dd89f6a..000000000 --- a/rest-api/calamari_rest/views/remote_view_set.py +++ /dev/null @@ -1,71 +0,0 @@ -import logging -from dateutil.parser import parse as dateutil_parse -import salt.client - -from calamari_common.config import CalamariConfig -from calamari_common.types import ServiceId, MON -from calamari_rest.views.exceptions import ServiceUnavailable -from calamari_rest.views.rpc_view import RPCViewSet - -log = logging.getLogger('django.request') -config = CalamariConfig() - - -class RemoteViewSet(RPCViewSet): - """ -A ViewSet for API resources which will run remote operations directly on the Ceph cluster, -out of band with respect to cthulhu. Useful for special cases, but don't use this for -adding new management functionality. - """ - def _get_up_mon_servers(self, fsid): - # Resolve FSID to list of mon FQDNs - servers = self.client.server_list_cluster(fsid) - # Sort to get most recently contacted server first; drop any - # for whom last_contact is None - servers = [s for s in servers if s['last_contact']] - servers = sorted(servers, - key=lambda t: dateutil_parse(t['last_contact']), - reverse=True) - mon_fqdns = [] - for server in servers: - for service in server['services']: - service_id = ServiceId(*(service['id'])) - if service['running'] and service_id.service_type == MON and service_id.fsid == fsid: - mon_fqdns.append(server['fqdn']) - - return mon_fqdns - - def run_mon_job(self, fsid, job_cmd, job_args): - """ - Attempt to run a Salt job on a mon server, trying each until we find one - where the job runs (where running includes running and returning an error) - """ - - # TODO: in order to support radosgw-admin commands we might need to be able to identify running RGW services - # alternatively it may be possible to run radosgw-admin on a mon node that isn't running the RGW service - mon_fqdns = self._get_up_mon_servers(fsid) - - client = salt.client.LocalClient(config.get('cthulhu', 'salt_config_path')) - log.debug("RemoteViewSet: mons for %s are %s" % (fsid, mon_fqdns)) - # For each mon FQDN, try to go get ceph/$cluster.log, if we succeed return it, if we fail try the next one - # NB this path is actually customizable in ceph as `mon_cluster_log_file` but we assume user hasn't done that. - for mon_fqdn in mon_fqdns: - results = client.cmd(mon_fqdn, job_cmd, job_args) - if results: - return results[mon_fqdn] - else: - log.info("Failed execute mon command on %s" % mon_fqdn) - - # If none of the mons gave us what we wanted, return a 503 service unavailable - raise ServiceUnavailable("No mon servers are responding") - - def run_job(self, fqdn, job_cmd, job_args): - """ - Attempt to run a Salt job on a specific server. - """ - client = salt.client.LocalClient(config.get('cthulhu', 'salt_config_path')) - results = client.cmd(fqdn, job_cmd, job_args) - if not results: - raise ServiceUnavailable("Server '{0}' not responding".format(fqdn)) - else: - return results[fqdn] diff --git a/rest-api/calamari_rest/views/v2.py b/rest-api/calamari_rest/views/v2.py index 56b53ce26..d924bcf81 100755 --- a/rest-api/calamari_rest/views/v2.py +++ b/rest-api/calamari_rest/views/v2.py @@ -18,7 +18,6 @@ from calamari_rest.views.exceptions import ServiceUnavailable from calamari_rest.views.paginated_mixin import PaginatedMixin from rest_framework.permissions import IsAuthenticated -from calamari_rest.views.remote_view_set import RemoteViewSet from calamari_rest.views.rpc_view import RPCViewSet, DataObject from calamari_rest.permissions import IsRoleAllowed from calamari_rest.views.crush_node import lookup_ancestry @@ -871,7 +870,7 @@ def list_server(self, request, fqdn): return Response(self._paginate(request, self._filter_by_severity(request, self.queryset.filter_by(fqdn=fqdn)))) -class LogTailViewSet(RemoteViewSet): +class LogTailViewSet(RPCViewSet): """ A primitive remote log viewer. @@ -1048,7 +1047,7 @@ def list(self, request, fsid): return Response(self.serializer_class([DataObject(m) for m in self._get_mons(fsid)], many=True).data) -class CliViewSet(RemoteViewSet): +class CliViewSet(RPCViewSet): """ Access the `ceph`, `rbd`, and `radosgw-admin` CLI tools remotely. @@ -1094,15 +1093,15 @@ def create(self, request, fsid): try: if principle == 'ceph': command.pop(0) - result = self.run_mon_job(fsid, "ceph.ceph_command", [name, command]) + result = self.client.run_mon_job(fsid, "ceph.ceph_command", [name, command]) elif principle == 'rbd': command.pop(0) - result = self.run_mon_job(fsid, "ceph.rbd_command", [command]) + result = self.client.run_mon_job(fsid, "ceph.rbd_command", [command]) elif principle == 'radosgw-admin': raise APIException("radosgw-admin calls are not yet supported %s" % str(result)) else: # Try the default 'ceph' target to maintain backwards compatibility - result = self.run_mon_job(fsid, "ceph.ceph_command", [name, command]) + result = self.client.run_mon_job(fsid, "ceph.ceph_command", [name, command]) except Exception as ex: raise APIException("Error in cli command: %s" % ex)