Skip to content

Commit

Permalink
fix(mongodb): 分片集群部署关闭balancer,调整chunk大小;新增mongos对clb的操作;优化mongos自愈拿机器
Browse files Browse the repository at this point in the history
  • Loading branch information
yyhenryyy committed Feb 14, 2025
1 parent 05b6bf4 commit 484b9dd
Show file tree
Hide file tree
Showing 14 changed files with 225 additions and 65 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from backend.configuration.constants import DBType
from backend.configuration.models.dba import DBAdministrator
from backend.core import notify
from backend.db_meta.models import Cluster
from backend.db_services.dbbase.constants import IpSource
from backend.db_services.redis.autofix.enums import AutofixStatus
from backend.db_services.redis.autofix.models import RedisAutofixCore
Expand All @@ -28,39 +29,75 @@
logger = logging.getLogger("root")


def get_resource_spec(mongos_list: list, mongod_list: list) -> dict:
"""获取申请机器规格信息,采用故障机与新机器园区相对应"""
def mongos_get_resource_spec(cluster_id: int, mongos_list: list) -> dict:
"""获取申请机器规格信息,mongos申请机器,判断如果一半机器在同一园区,则排除该园区,否则任意园区"""

include_or_exclue = True
sub_zone_ids = []
# 获取mongos的机器
all_mongos = Cluster.objects.get(id=cluster_id).proxyinstance_set.all()
# 获取健康mongos机器在园区的比重 {sub_zone_id: number}
health_mongos_number = len(all_mongos) - len(mongos_list) # 所有健康mongos的数量
health_mongos_sub_zone = {} # 健康mongos在每个园区的数量
for mongos in all_mongos:
if mongos.machine.ip in [host("ip") for host in mongos_list]:
continue
if health_mongos_sub_zone.get(str(mongos.machine.bk_sub_zone_id)):
health_mongos_sub_zone[str(mongos.machine.bk_sub_zone_id)] = 0
health_mongos_sub_zone[str(mongos.machine.bk_sub_zone_id)] += 1
# 健康mongos在每个园区的机器数量占所有健康mongos的数量的百分比
mongos_sub_zone_percent = {
sub_zone: num / health_mongos_number for sub_zone, num in health_mongos_sub_zone.items()
}
for sub_zone, sub_zone_percent in mongos_sub_zone_percent.items():
if sub_zone_percent >= 0.5:
include_or_exclue = False
sub_zone_ids.append(int(sub_zone))
break

resource_spec = {}
for host in mongos_list + mongod_list:
for host in mongos_list:
resource_spec.update(
{
host["ip"]: {
"spec_id": host["spec_id"],
"count": 1,
"spec_config": host["spec_config"],
"Location_spec": {"city": host["city"], "sub_zone_ids": [host["bk_sub_zone_id"]]},
"location_spec": {
"city": host["city"],
"sub_zone_ids": sub_zone_ids,
"include_or_exclue": include_or_exclue,
},
}
}
)
return resource_spec


def mongo_create_ticket(cluster: RedisAutofixCore, cluster_ids: list, mongos_list: list, mongod_list: list):
"""mongodb自愈创建单据"""
"""mongodb自愈创建单据 以cluster为维度"""

# 获取dba
mongodb_dba = DBAdministrator.get_biz_db_type_admins(bk_biz_id=cluster.bk_biz_id, db_type=DBType.MongoDB.value)

# 申请机器规格信息
resource_spec = get_resource_spec(mongos_list, mongod_list)
if not resource_spec:
return
resource_spec = {}

# 集群类型
if mongos_list:
cluster_type = mongos_list[0]["cluster_type"]
# mongos的资源规格
mongos_resource_spec = mongos_get_resource_spec(cluster_ids[0], mongos_list)
resource_spec.update(mongos_resource_spec)
if mongod_list:
cluster_type = mongod_list[0]["cluster_type"]
# mongodb的资源规格 TODO
# mongod_resource_spec = mongod_get_resource_spec(cluster_ids[0], mongod_list)
# mongod_resource_spec.update(mongod_resource_spec)

if not resource_spec:
return

# 单据信息
details = {
"ip_source": IpSource.RESOURCE_POOL.value,
Expand Down
11 changes: 9 additions & 2 deletions dbm-ui/backend/db_services/plugin/nameservice/clb.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from backend.configuration.models import DBAdministrator
from backend.db_meta import api
from backend.db_meta.enums import ClusterEntryType
from backend.db_meta.enums.cluster_type import ClusterType
from backend.db_meta.models import Cluster, ClusterEntry
from backend.env import CLB_DOMAIN
from backend.flow.utils import dns_manage
Expand Down Expand Up @@ -132,6 +133,12 @@ def create_lb_and_register_target(cluster_id: int) -> Dict[str, Any]:
cluster = get_cluster_info(cluster_id=cluster_id)
immute_domain = cluster["immute_domain"]

# 获取DB类型
if cluster["cluster_type"] == ClusterType.MongoShardedCluster.value:
db_type = DBType.MongoDB.value
else:
db_type = DBType.Redis.value

# 判断clb是否已经存在
if ClusterEntryType.CLB.value in cluster["clusterentry_set"]:
message = "clb of cluster:{} has existed".format(immute_domain)
Expand All @@ -141,7 +148,7 @@ def create_lb_and_register_target(cluster_id: int) -> Dict[str, Any]:
bk_biz_id = cluster["bk_biz_id"]

# 通过bk_biz_id获取manager,backupmanager,去除admin
users = DBAdministrator().get_biz_db_type_admins(bk_biz_id=bk_biz_id, db_type=DBType.Redis)
users = DBAdministrator().get_biz_db_type_admins(bk_biz_id=bk_biz_id, db_type=db_type)
users = [user for user in users if user != "admin"]
manager = users[0]
backupmanager = users[1] if len(users) > 1 else users[0]
Expand Down Expand Up @@ -391,7 +398,7 @@ def operate_part_target(cluster_id: int, ips: list, bind: bool) -> dict:

# 获取信息
cluster = get_cluster_info(cluster_id=cluster_id)
clb = cluster["clusterentry_set"]["clb"]
clb = cluster["clusterentry_set"]["clb"][0]
clb_id = clb["clb_id"]
listener_id = clb["listener_id"]
clb_region = clb["clb_region"]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,15 @@ def cluster_install_flow(self):
act_component_code=ExecAddRelationshipOperationComponent.code,
kwargs=kwargs,
)

# cluster初始配置
kwargs = self.get_kwargs.get_mongodb_cluster_init_kwargs()
pipeline.add_act(
act_name=_("MongoDB--cluster初始化设置"),
act_component_code=ExecuteDBActuatorJobComponent.code,
kwargs=kwargs,
)

# 域名写入dns
kwargs = self.get_kwargs.get_add_domain_to_dns_kwargs(cluster=True)
pipeline.add_act(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@

from django.utils.translation import ugettext as _

from backend.db_meta import api
from backend.db_meta.enums import ClusterEntryType
from backend.db_meta.enums.cluster_type import ClusterType
from backend.flow.consts import MongoDBInstanceType, MongoDBManagerUser
from backend.flow.engine.bamboo.scene.common.builder import SubBuilder
Expand All @@ -34,9 +32,10 @@
)
from backend.flow.plugins.components.collections.mongodb.mongodb_cmr_4_meta import CMRMongoDBMetaComponent
from backend.flow.plugins.components.collections.mongodb.send_media import ExecSendMediaOperationComponent
from backend.flow.plugins.components.collections.name_service.name_service import ExecNameServiceOperationComponent
from backend.flow.utils.mongodb.mongodb_dataclass import ActKwargs

from .mongos_replace import cluster_clb, mongos_operate_clb


def mongos_autofix(root_id: str, ticket_data: Optional[Dict], sub_sub_kwargs: ActKwargs, info: dict) -> SubBuilder:
"""
Expand Down Expand Up @@ -109,12 +108,9 @@ def mongos_autofix(root_id: str, ticket_data: Optional[Dict], sub_sub_kwargs: Ac
sub_sub_get_kwargs.payload["set_id"] = sub_sub_get_kwargs.db_instance["cluster_name"]

# 判断是否有clb
clb = False
if (
ClusterEntryType.CLB.value
in api.cluster.nosqlcomm.other.get_cluster_detail(cluster_id=cluster_id)[0]["clusterentry_set"]
):
clb = True
clb = cluster_clb(cluster_id=cluster_id)
creator = sub_sub_get_kwargs.payload.get("created_by")

# dbha已做clb解绑,dns删除
# # 删除clb中绑定的老ip
# if clb:
Expand Down Expand Up @@ -183,16 +179,12 @@ def mongos_autofix(root_id: str, ticket_data: Optional[Dict], sub_sub_kwargs: Ac

# clb绑定新ip
if clb:
kwargs = {
"name_service_operation_type": "clb_register_part_target",
"creator": sub_sub_get_kwargs.payload["created_by"],
"cluster_id": cluster_id,
"ips": ["{}:{}".format(node["ip"], str(sub_sub_get_kwargs.db_instance["port"]))],
}
sub_sub_pipeline.add_act(
act_name=_("MongoDB-clb绑定新ip"),
act_component_code=ExecNameServiceOperationComponent.code,
kwargs=kwargs,
mongos_operate_clb(
cluster_id=cluster_id,
creator=creator,
ips=["{}:{}".format(node["ip"], str(sub_sub_get_kwargs.db_instance["port"]))],
bind=True,
pipeline=sub_sub_pipeline,
)

# 添加新的dns
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from backend.db_meta.enums.cluster_type import ClusterType
from backend.flow.consts import MongoDBInstanceType
from backend.flow.engine.bamboo.scene.common.builder import SubBuilder
from backend.flow.engine.bamboo.scene.name_service.name_service import NameServiceFlow
from backend.flow.plugins.components.collections.mongodb.delete_domain_from_dns import (
ExecDeleteDomainFromDnsOperationComponent,
)
Expand All @@ -27,6 +28,8 @@
from backend.flow.plugins.components.collections.mongodb.mongo_shutdown_meta import MongosShutdownMetaComponent
from backend.flow.utils.mongodb.mongodb_dataclass import ActKwargs

from .mongos_replace import cluster_clb


def mongo_deinstall_parallel(sub_get_kwargs: ActKwargs, nodes: list, instance_type: str, force: bool) -> list:
acts_list = []
Expand Down Expand Up @@ -107,6 +110,17 @@ def deinstall(
)
if acts_list:
sub_pipeline.add_parallel_acts(acts_list=acts_list)
if not reduce_mongos:
# 判断是否有clb
clb = cluster_clb(cluster_id=cluster_id)
creator = sub_get_kwargs.payload.get("created_by")
if clb:
kwargs = {
"bk_biz_id": sub_get_kwargs.payload["bk_biz_id"],
"created_by": creator,
"cluster_id": cluster_id,
}
NameServiceFlow(root_id=root_id, data=kwargs).clb_delete(pipeline=sub_pipeline)

# 删除dns
kwargs = sub_get_kwargs.get_delete_domain_kwargs()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from backend.flow.utils.mongodb.mongodb_dataclass import ActKwargs

from .mongos_install import mongos_install
from .mongos_replace import cluster_clb, mongos_operate_clb


def increase_mongos(root_id: str, ticket_data: Optional[Dict], sub_kwargs: ActKwargs, info: dict) -> SubBuilder:
Expand Down Expand Up @@ -66,8 +67,9 @@ def increase_mongos(root_id: str, ticket_data: Optional[Dict], sub_kwargs: ActKw
)

# 获取信息
sub_get_kwargs.get_cluster_info_deinstall(cluster_id=info["cluster_id"])
sub_get_kwargs.payload["cluster_id"] = info["cluster_id"]
cluster_id = info["cluster_id"]
sub_get_kwargs.get_cluster_info_deinstall(cluster_id=cluster_id)
sub_get_kwargs.payload["cluster_id"] = cluster_id
sub_get_kwargs.cluster_type = sub_get_kwargs.payload["cluster_type"]
sub_get_kwargs.mongos_info = info
sub_get_kwargs.mongos_info["nodes"] = info["mongos"]
Expand All @@ -85,6 +87,9 @@ def increase_mongos(root_id: str, ticket_data: Optional[Dict], sub_kwargs: ActKw
sub_get_kwargs.payload["mongos"]["nodes"] = info["mongos"]
sub_get_kwargs.payload["mongos"]["domain"] = sub_get_kwargs.payload["mongos_nodes"][0]["domain"]
sub_get_kwargs.payload["mongos"]["port"] = sub_get_kwargs.payload["mongos_nodes"][0]["port"]
# 判断是否有clb
clb = cluster_clb(cluster_id=cluster_id)
creator = sub_get_kwargs.payload["created_by"]

# 进行mongos安装——子流程
sub_sub_pipeline = mongos_install(
Expand All @@ -95,6 +100,16 @@ def increase_mongos(root_id: str, ticket_data: Optional[Dict], sub_kwargs: ActKw
)
sub_pipeline.add_sub_pipeline(sub_sub_pipeline)

# 实例绑定clb
if clb:
mongos_operate_clb(
cluster_id=cluster_id,
creator=creator,
ips=["{}:{}".format(node["ip"], str(sub_get_kwargs.mongos_info["port"])) for node in info["mongos"]],
bind=True,
pipeline=sub_pipeline,
)

# dns新增实例
kwargs = sub_get_kwargs.get_add_domain_to_dns_kwargs(cluster=True)
sub_pipeline.add_act(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,41 @@
from backend.flow.plugins.components.collections.mongodb.exec_actuator_job import ExecuteDBActuatorJobComponent
from backend.flow.plugins.components.collections.name_service.name_service import ExecNameServiceOperationComponent
from backend.flow.utils.mongodb.mongodb_dataclass import ActKwargs
from backend.flow.utils.name_service.name_service_dataclass import TransDataKwargs


def cluster_clb(cluster_id: int) -> bool:
"""判断分片集群是否有clb"""

if (
ClusterEntryType.CLB.value
in api.cluster.nosqlcomm.other.get_cluster_detail(cluster_id=cluster_id)[0]["clusterentry_set"]
):
return True
return False


def mongos_operate_clb(cluster_id: int, creator: str, ips: list, bind: bool, pipeline: SubBuilder):
"""mongos操作clb"""

if bind:
name_service_operation_type = "clb_register_part_target"
act_name = _("MongoDB-clb绑定新ip")
else:
name_service_operation_type = "clb_deregister_part_target"
act_name = _("MongoDB-clb解绑老ip")
kwargs = {
"set_trans_data_dataclass": TransDataKwargs.__name__,
"name_service_operation_type": name_service_operation_type,
"creator": creator,
"cluster_id": cluster_id,
"ips": ips,
}
pipeline.add_act(
act_name=_(act_name),
act_component_code=ExecNameServiceOperationComponent.code,
kwargs=kwargs,
)


def mongos_replace(root_id: str, ticket_data: Optional[Dict], sub_sub_kwargs: ActKwargs, info: dict) -> SubBuilder:
Expand Down Expand Up @@ -92,25 +127,17 @@ def mongos_replace(root_id: str, ticket_data: Optional[Dict], sub_sub_kwargs: Ac
sub_sub_get_kwargs.payload["bk_cloud_id"] = info["bk_cloud_id"]
sub_sub_get_kwargs.payload["set_id"] = sub_sub_get_kwargs.db_instance["cluster_name"]
# 判断是否有clb
clb = False
if (
ClusterEntryType.CLB.value
in api.cluster.nosqlcomm.other.get_cluster_detail(cluster_id=cluster_id)[0]["clusterentry_set"]
):
clb = True
clb = cluster_clb(cluster_id=cluster_id)
creator = sub_sub_get_kwargs.payload["created_by"]

# clb解绑老ip
if clb:
kwargs = {
"name_service_operation_type": "clb_deregister_part_target",
"creator": sub_sub_get_kwargs.payload["creator"],
"cluster_id": cluster_id,
"ips": ["{}:{}".format(info["ip"], str(sub_sub_get_kwargs.db_instance["port"]))],
}
sub_sub_pipeline.add_act(
act_name=_("MongoDB-clb解绑替换ip"),
act_component_code=ExecNameServiceOperationComponent.code,
kwargs=kwargs,
mongos_operate_clb(
cluster_id=cluster_id,
creator=creator,
ips=["{}:{}".format(info["ip"], str(sub_sub_get_kwargs.db_instance["port"]))],
bind=False,
pipeline=sub_sub_pipeline,
)

# 删除老的dns
Expand All @@ -131,16 +158,12 @@ def mongos_replace(root_id: str, ticket_data: Optional[Dict], sub_sub_kwargs: Ac

# clb绑定新ip
if clb:
kwargs = {
"name_service_operation_type": "clb_register_part_target",
"creator": sub_sub_get_kwargs.payload["creator"],
"cluster_id": cluster_id,
"ips": ["{}:{}".format(node["ip"], str(sub_sub_get_kwargs.db_instance["port"]))],
}
sub_sub_pipeline.add_act(
act_name=_("MongoDB-clb绑定新ip"),
act_component_code=ExecNameServiceOperationComponent.code,
kwargs=kwargs,
mongos_operate_clb(
cluster_id=cluster_id,
creator=creator,
ips=["{}:{}".format(node["ip"], str(sub_sub_get_kwargs.db_instance["port"]))],
bind=True,
pipeline=sub_sub_pipeline,
)

# 添加新的dns
Expand Down
Loading

0 comments on commit 484b9dd

Please sign in to comment.