Skip to content

Commit 1268128

Browse files
Merge branch 'develop' into librbd
2 parents 09e11d5 + 6ea5d24 commit 1268128

File tree

2 files changed

+69
-35
lines changed

2 files changed

+69
-35
lines changed

tendrl/ceph_integration/ceph.py

Lines changed: 60 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import socket
88
import struct
99
import subprocess
10+
import sys
1011
import tempfile
1112
import time
1213
import rbd
@@ -29,7 +30,7 @@
2930
# always
3031
# present, although this is the case on a nicely ceph-deploy'd system
3132
RADOS_NAME = 'client.admin'
32-
33+
SRC_DIR = '/etc/ceph'
3334

3435
def fire_event(data, tag):
3536
return {tag: data}
@@ -53,6 +54,18 @@ class AdminSocketError(MonitoringError):
5354
pass
5455

5556

57+
def get_ceph_version():
58+
result = ceph_command(None, ['--version'])
59+
try:
60+
version = result['out'].split(' ')[2]
61+
return version
62+
except (KeyError, AttributeError, IndexError) as ex:
63+
sys.stdout.write("Error getting ceph --version")
64+
sys.stdout.write(str(ex))
65+
raise ex
66+
67+
68+
5669
def rados_command(cluster_handle, prefix, args=None, decode=True):
5770
"""Safer wrapper for ceph_argparse.json_command, which raises
5871
@@ -280,8 +293,12 @@ def rados_commands(fsid, cluster_name, commands):
280293
import rados
281294

282295
# Open a RADOS session
296+
if cluster_name is None:
297+
cluster_name = "ceph"
298+
299+
_conf_file = os.path.join(SRC_DIR, cluster_name + ".conf")
283300
cluster_handle = rados.Rados(
284-
name=RADOS_NAME, clustername=cluster_name, conffile=''
301+
name=RADOS_NAME, clustername=cluster_name, conffile=_conf_file
285302
)
286303
cluster_handle.connect()
287304

@@ -529,8 +546,12 @@ def get_cluster_object(cluster_name, sync_type):
529546
assert sync_type in SYNC_TYPES
530547

531548
# Open a RADOS session
549+
if cluster_name is None:
550+
cluster_name = "ceph"
551+
552+
_conf_file = os.path.join(SRC_DIR, cluster_name + ".conf")
532553
cluster_handle = rados.Rados(
533-
name=RADOS_NAME, clustername=cluster_name, conffile=''
554+
name=RADOS_NAME, clustername=cluster_name, conffile=_conf_file
534555
)
535556
cluster_handle.connect()
536557

@@ -698,10 +719,12 @@ def get_heartbeats():
698719
if "client" in filename:
699720
continue
700721
service_data = service_status(filename)
701-
except (rados.Error, MonitoringError):
722+
except (rados.Error, MonitoringError) as ex:
702723
# Failed to get info for this service, stale socket or
703724
# unresponsive, exclude it from report
704-
pass
725+
sys.stdout.write("Error getting ceph service status from admin "
726+
"socket %s" % filename)
727+
sys.stdout.write(str(ex))
705728
else:
706729
if not service_data:
707730
continue
@@ -713,51 +736,46 @@ def get_heartbeats():
713736
# A mon in quorum is elegible to emit a cluster heartbeat
714737
mon_sockets[service_data['fsid']] = filename
715738

716-
# Installed Ceph version (as oppose to per-service running ceph version)
717-
try:
718-
ceph_version_str = subprocess.check_output(
719-
"rpm -qa | grep ceph-[0-1]", shell=True
720-
)
721-
ceph_version_str = ceph_version_str.split("-")[1]
722-
except subprocess.CalledProcessError:
723-
ceph_version_str = None
724-
if ceph_version_str:
725-
ceph_version = ceph_version_str
726-
else:
727-
ceph_version = None
739+
ceph_version = get_ceph_version()
728740

729741
# For each ceph cluster with an in-quorum mon on this node, interrogate
730742
# the cluster
731743
cluster_heartbeat = {}
732744
for fsid, socket_path in mon_sockets.items():
745+
cluster_handle = None
733746
try:
747+
_conf_file = os.path.join(SRC_DIR, fsid_names[fsid] + ".conf")
734748
cluster_handle = rados.Rados(
735-
name=RADOS_NAME, clustername=fsid_names[fsid], conffile=''
749+
name=RADOS_NAME, clustername=fsid_names[fsid],
750+
conffile=_conf_file
736751
)
737752
cluster_handle.connect()
738753
cluster_heartbeat[fsid] = cluster_status(
739754
cluster_handle, fsid_names[fsid]
740755
)
741-
except (rados.Error, MonitoringError):
742-
# Something went wrong getting data for this cluster, exclude it
743-
# from our report
744-
pass
756+
except (rados.Error, MonitoringError) as ex:
757+
# Something went wrong getting data for this cluster
758+
sys.stdout.write("Error fetching ceph (fsid: %s) cluster maps "
759+
"from "
760+
"admin socket %s" % (fsid_names[fsid],
761+
socket_path))
762+
sys.stdout.write(str(ex))
763+
raise ex
764+
finally:
765+
if cluster_handle:
766+
cluster_handle.shutdown()
745767

746-
cluster_handle.shutdown()
747768
return ceph_version, cluster_heartbeat
748769

749770

750771
def service_status(socket_path):
751772
"""Given an admin socket path, learn all we can about that service
752773
753774
"""
754-
try:
755-
cluster_name, service_type, service_id = \
756-
re.match(
757-
"^(.+?)-(.+?)\.(.+)\.asok$",
758-
os.path.basename(socket_path)).groups()
759-
except AttributeError:
760-
return None
775+
cluster_name, service_type, service_id = \
776+
re.match(
777+
"^(.+?)-(.+?)\.(.+)\.asok$",
778+
os.path.basename(socket_path)).groups()
761779

762780
status = None
763781
# Interrogate the service for its FSID
@@ -890,11 +908,20 @@ def _heartbeat(fsid):
890908

891909

892910
def heartbeat(fsid=None):
911+
try:
912+
import rados
913+
except ImportError:
914+
# Ceph isn't installed, report no services or clusters
915+
return None, {}
916+
893917
try:
894918
return _heartbeat(fsid)
895-
except Exception:
896-
# TODO(Rohan): Tackle this later
897-
pass
919+
except Exception as ex:
920+
sys.stdout.write("Error getting heartbeat for ceph cluster fsid %s"
921+
% fsid)
922+
sys.stdout.write(str(ex))
923+
if type(ex) in [rados.Error, MonitoringError, AdminSocketError]:
924+
raise ex
898925

899926

900927
def json_load_byteified(file_handle):

tendrl/ceph_integration/sds_sync/__init__.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -671,10 +671,13 @@ def _get_rbds(self, pool_name):
671671
def _get_utilization_data(self):
672672
from ceph_argparse import json_command
673673
import rados
674+
_conf_file = os.path.join("/etc/ceph",
675+
NS.tendrl_context.cluster_name + ".conf")
676+
# TODO (shtripat) use ceph.ceph_command instead of rados/json_command
674677
cluster_handle = rados.Rados(
675678
name=ceph.RADOS_NAME,
676-
clustername=self.name,
677-
conffile=''
679+
clustername=NS.tendrl_context.cluster_name,
680+
conffile=_conf_file
678681
)
679682
cluster_handle.connect()
680683
prefix = 'df'
@@ -685,6 +688,7 @@ def _get_utilization_data(self):
685688
timeout=ceph.RADOS_TIMEOUT
686689
)
687690
if ret != 0:
691+
cluster_handle.shutdown()
688692
raise rados.Error(outs)
689693
else:
690694
outbuf = outbuf.replace('RAW USED', 'RAW_USED')
@@ -695,6 +699,8 @@ def _get_utilization_data(self):
695699
cluster_stat = {}
696700
pool_stat = []
697701
pool_stat_available = False
702+
cluster_handle.shutdown()
703+
698704
while index < len(lines):
699705
line = lines[index]
700706
if line == "" or line == '\n':
@@ -823,6 +829,7 @@ def _get_utilization_data(self):
823829
dict['pcnt_used'] = pool_fields[pool_pcnt_used_idx]
824830
pool_stat.append(dict)
825831
index += 1
832+
826833
return {'cluster': cluster_stat, 'pools': pool_stat}
827834

828835
def _idx_in_list(self, list, str):

0 commit comments

Comments
 (0)