Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move run_mon_job to cthulhu #492

Open
wants to merge 1 commit into
base: 1.3
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 55 additions & 1 deletion cthulhu/cthulhu/manager/rpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,9 @@
from cthulhu.manager import config
from cthulhu.log import log
from calamari_common.types import OsdMap, SYNC_OBJECT_STR_TYPE, OSD, OSD_MAP, POOL, CLUSTER, CRUSH_NODE, CRUSH_MAP, CRUSH_RULE, CRUSH_TYPE, ServiceId,\
NotFound, SERVER
NotFound, SERVER, MON
from cthulhu.manager.user_request import SaltRequest
from dateutil.parser import parse as dateutil_parse


class RpcInterface(object):
Expand Down Expand Up @@ -429,6 +430,59 @@ def status_by_service(self, services):
return [({'running': ss.running, 'server': ss.server_state.fqdn, 'status': ss.status} if ss else None)
for ss in result]

def _get_up_mon_servers(self, fsid):
# Resolve FSID to list of mon FQDNs
servers = self.server_list_cluster(fsid)
# Sort to get most recently contacted server first; drop any
# for whom last_contact is None
servers = [s for s in servers if s['last_contact']]
servers = sorted(servers,
key=lambda t: dateutil_parse(t['last_contact']),
reverse=True)
mon_fqdns = []
for server in servers:
for service in server['services']:
service_id = ServiceId(*(service['id']))
if service['running'] and service_id.service_type == MON and service_id.fsid == fsid:
mon_fqdns.append(server['fqdn'])

return mon_fqdns

def run_mon_job(self, fsid, job_cmd, job_args):
"""
Attempt to run a Salt job on a mon server, trying each until we find one
where the job runs (where running includes running and returning an error)
"""

# TODO: in order to support radosgw-admin commands we might need to be able to identify running RGW services
# alternatively it may be possible to run radosgw-admin on a mon node that isn't running the RGW service
mon_fqdns = self._get_up_mon_servers(fsid)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about use self._fs_resolve(fs_id)._favorite_mon instead


client = LocalClient(config.get('cthulhu', 'salt_config_path'))
log.debug("run_mon_job: mons for %s are %s" % (fsid, mon_fqdns))
# For each mon FQDN, try to go get ceph/$cluster.log, if we succeed return it, if we fail try the next one
# NB this path is actually customizable in ceph as `mon_cluster_log_file` but we assume user hasn't done that.
for mon_fqdn in mon_fqdns:
results = client.cmd(mon_fqdn, job_cmd, job_args)
if results:
return results[mon_fqdn]
else:
log.info("Failed execute mon command on %s" % mon_fqdn)

# If none of the mons gave us what we wanted, return a 503 service unavailable
raise RuntimeError("No mon servers are responding")

def run_job(self, fqdn, job_cmd, job_args):
"""
Attempt to run a Salt job on a specific server.
"""
client = LocalClient(config.get('cthulhu', 'salt_config_path'))
results = client.cmd(fqdn, job_cmd, job_args)
if not results:
raise RuntimeError("Server '{0}' not responding".format(fqdn))
else:
return results[fqdn]


class RpcThread(gevent.greenlet.Greenlet):
"""
Expand Down
71 changes: 0 additions & 71 deletions rest-api/calamari_rest/views/remote_view_set.py

This file was deleted.

11 changes: 5 additions & 6 deletions rest-api/calamari_rest/views/v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
from calamari_rest.views.exceptions import ServiceUnavailable
from calamari_rest.views.paginated_mixin import PaginatedMixin
from rest_framework.permissions import IsAuthenticated
from calamari_rest.views.remote_view_set import RemoteViewSet
from calamari_rest.views.rpc_view import RPCViewSet, DataObject
from calamari_rest.permissions import IsRoleAllowed
from calamari_rest.views.crush_node import lookup_ancestry
Expand Down Expand Up @@ -871,7 +870,7 @@ def list_server(self, request, fqdn):
return Response(self._paginate(request, self._filter_by_severity(request, self.queryset.filter_by(fqdn=fqdn))))


class LogTailViewSet(RemoteViewSet):
class LogTailViewSet(RPCViewSet):
"""
A primitive remote log viewer.

Expand Down Expand Up @@ -1048,7 +1047,7 @@ def list(self, request, fsid):
return Response(self.serializer_class([DataObject(m) for m in self._get_mons(fsid)], many=True).data)


class CliViewSet(RemoteViewSet):
class CliViewSet(RPCViewSet):
"""
Access the `ceph`, `rbd`, and `radosgw-admin` CLI tools remotely.

Expand Down Expand Up @@ -1094,15 +1093,15 @@ def create(self, request, fsid):
try:
if principle == 'ceph':
command.pop(0)
result = self.run_mon_job(fsid, "ceph.ceph_command", [name, command])
result = self.client.run_mon_job(fsid, "ceph.ceph_command", [name, command])
elif principle == 'rbd':
command.pop(0)
result = self.run_mon_job(fsid, "ceph.rbd_command", [command])
result = self.client.run_mon_job(fsid, "ceph.rbd_command", [command])
elif principle == 'radosgw-admin':
raise APIException("radosgw-admin calls are not yet supported %s" % str(result))
else:
# Try the default 'ceph' target to maintain backwards compatibility
result = self.run_mon_job(fsid, "ceph.ceph_command", [name, command])
result = self.client.run_mon_job(fsid, "ceph.ceph_command", [name, command])
except Exception as ex:
raise APIException("Error in cli command: %s" % ex)

Expand Down