Skip to content

Commit

Permalink
[debug] Add logging for endpoint replica information in job monitor
Browse files Browse the repository at this point in the history
  • Loading branch information
charlieyl committed Feb 19, 2025
1 parent 46b8ce7 commit 38a930a
Showing 1 changed file with 7 additions and 2 deletions.
9 changes: 7 additions & 2 deletions python/fedml/computing/scheduler/comm_utils/job_monitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,7 @@ def monitor_replicas_number():
res_to_mlops = {} # endpoint_id -> num_replica

for endpoint_detail in res_frm_db:
logging.info(f"endpoint_detail: {endpoint_detail}")
endpoint_replicas_details = {}
if isinstance(endpoint_detail, str):
endpoint_replicas_details = json.loads(endpoint_detail)
Expand All @@ -218,11 +219,13 @@ def monitor_replicas_number():
endpoint_replica_details = {}
if isinstance(endpoint_replicas_details["result"], str):
endpoint_replica_details = json.loads(endpoint_replicas_details["result"])


logging.info(f"endpoint_replica_details: {endpoint_replica_details}")
res_to_mlops[endpoint_replica_details["end_point_id"]] = res_to_mlops.get(
endpoint_replica_details["end_point_id"], 0) + 1

for endpoint_id, num_replica in res_to_mlops.items():
logging.info(f"endpoint_id: {endpoint_id}, num_replica: {num_replica}")
num_replica_url_path = "fedmlModelServer/api/v1/endpoint/replica-info"
mlops_prefix = fedml._get_backend_service()
url = f"{mlops_prefix}/{num_replica_url_path}"
Expand All @@ -240,13 +243,15 @@ def monitor_replicas_number():
"replicaNumber": int(num_replica),
"timestamp": int(time.time() * 1000)
}

logging.info(f"req_header: {req_header}")
logging.info(f"req_body: {req_body}")
try:
response = requests.post(
url,
headers=req_header,
json=req_body
)
logging.info(f"endpoint_id: {endpoint_id}, response: {response}")
if response.status_code != 200:
logging.error(f"Failed to send the replica number request to MLOps platform.")
else:
Expand Down

0 comments on commit 38a930a

Please sign in to comment.