Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: split admin API into unprivileged and privileged endpoints #461

Merged
merged 1 commit into from
Jan 30, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ ifeq ($(shell test -n "$(DD_API_KEY)" || echo not-found), not-found)
endif
@echo "[*] Running ADP..."
@DD_DOGSTATSD_PORT=9191 DD_DOGSTATSD_SOCKET=/tmp/adp-dogstatsd-dgram.sock DD_DOGSTATSD_STREAM_SOCKET=/tmp/adp-dogstatsd-stream.sock \
DD_TELEMETRY_ENABLED=true DD_PROMETHEUS_LISTEN_ADDR=tcp://127.0.0.1:5101 \
DD_TELEMETRY_ENABLED=true DD_PROMETHEUS_LISTEN_ADDR=tcp://127.0.0.1:5102 \
DD_AUTH_TOKEN_FILE_PATH=/etc/datadog-agent/auth_token \
target/debug/agent-data-plane

Expand All @@ -214,7 +214,7 @@ ifeq ($(shell test -n "$(DD_API_KEY)" || echo not-found), not-found)
endif
@echo "[*] Running ADP..."
@DD_DOGSTATSD_PORT=9191 DD_DOGSTATSD_SOCKET=/tmp/adp-dogstatsd-dgram.sock DD_DOGSTATSD_STREAM_SOCKET=/tmp/adp-dogstatsd-stream.sock \
DD_TELEMETRY_ENABLED=true DD_PROMETHEUS_LISTEN_ADDR=tcp://127.0.0.1:5101 \
DD_TELEMETRY_ENABLED=true DD_PROMETHEUS_LISTEN_ADDR=tcp://127.0.0.1:5102 \
DD_AUTH_TOKEN_FILE_PATH=/etc/datadog-agent/auth_token \
target/release/agent-data-plane

Expand All @@ -225,7 +225,7 @@ run-adp-standalone: ## Runs ADP locally in standalone mode (debug)
@DD_ADP_STANDALONE_MODE=true \
DD_API_KEY=api-key-adp-standalone DD_HOSTNAME=adp-standalone \
DD_DOGSTATSD_PORT=9191 DD_DOGSTATSD_SOCKET=/tmp/adp-dogstatsd-dgram.sock DD_DOGSTATSD_STREAM_SOCKET=/tmp/adp-dogstatsd-stream.sock \
DD_TELEMETRY_ENABLED=true DD_PROMETHEUS_LISTEN_ADDR=tcp://127.0.0.1:5101 \
DD_TELEMETRY_ENABLED=true DD_PROMETHEUS_LISTEN_ADDR=tcp://127.0.0.1:5102 \
target/debug/agent-data-plane

.PHONY: run-adp-standalone-release
Expand All @@ -235,7 +235,7 @@ run-adp-standalone-release: ## Runs ADP locally in standalone mode (release)
@DD_ADP_STANDALONE_MODE=true \
DD_API_KEY=api-key-adp-standalone DD_HOSTNAME=adp-standalone \
DD_DOGSTATSD_PORT=9191 DD_DOGSTATSD_SOCKET=/tmp/adp-dogstatsd-dgram.sock DD_DOGSTATSD_STREAM_SOCKET=/tmp/adp-dogstatsd-stream.sock \
DD_TELEMETRY_ENABLED=true DD_PROMETHEUS_LISTEN_ADDR=tcp://127.0.0.1:5101 \
DD_TELEMETRY_ENABLED=true DD_PROMETHEUS_LISTEN_ADDR=tcp://127.0.0.1:5102 \
target/release/agent-data-plane

.PHONY: run-dsd-basic-udp
Expand Down Expand Up @@ -456,7 +456,7 @@ endif
@DD_API_KEY=api-key-adp-profiling DD_HOSTNAME=adp-profiling DD_DD_URL=http://127.0.0.1:9095 \
DD_ADP_STANDALONE_MODE=true \
DD_DOGSTATSD_PORT=9191 DD_DOGSTATSD_SOCKET=/tmp/adp-dogstatsd-dgram.sock DD_DOGSTATSD_STREAM_SOCKET=/tmp/adp-dogstatsd-stream.sock \
DD_TELEMETRY_ENABLED=true DD_PROMETHEUS_LISTEN_ADDR=tcp://127.0.0.1:5101 \
DD_TELEMETRY_ENABLED=true DD_PROMETHEUS_LISTEN_ADDR=tcp://127.0.0.1:5102 \
./test/ddprof/bin/ddprof --service adp --environment local --service-version $(GIT_COMMIT) \
--url unix:///var/run/datadog/apm.socket \
--inlined-functions true --timeline --upload-period 10 --preset cpu_live_heap \
Expand Down
78 changes: 61 additions & 17 deletions bin/agent-data-plane/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -111,19 +111,17 @@ async fn run(started: Instant, logging_api_handler: LoggingAPIHandler) -> Result
// and a Datadog Metrics destination that forwards aggregated buckets to the Datadog Platform.
let blueprint = create_topology(&configuration, env_provider, &component_registry).await?;

// Build our administrative API server.
let primary_api_listen_address = configuration
.try_get_typed("api_listen_address")
.error_context("Failed to get API listen address.")?
.unwrap_or_else(|| ListenAddress::Tcp(([0, 0, 0, 0], 5100).into()));

let remote_agent_service = new_remote_agent_service()?;

let primary_api = APIBuilder::new()
// Build our unprivileged and privileged API server.
//
// The unprivileged API is purely for things like health checks or read-only information. The privileged API is
// meant for sensitive information or actions that require elevated permissions.
let unprivileged_api = APIBuilder::new()
.with_handler(health_registry.api_handler())
.with_handler(component_registry.api_handler())
.with_grpc_service(remote_agent_service)
.with_handler(component_registry.api_handler());

let privileged_api = APIBuilder::new()
.with_self_signed_tls()
.with_grpc_service(new_remote_agent_service())
.with_handler(logging_api_handler);

// Run memory bounds validation to ensure that we can launch the topology with our configured memory limit, if any.
Expand All @@ -137,12 +135,9 @@ async fn run(started: Instant, logging_api_handler: LoggingAPIHandler) -> Result
// Spawn the health checker.
health_registry.spawn().await?;

// Run the API server now that we've been able to launch the topology.
//
// TODO: Use something better than `pending()`... perhaps something like a more generalized
// `ComponentShutdownCoordinator` that allows for triggering and waiting for all attached tasks to signal that
// they've shutdown.
primary_api.serve(primary_api_listen_address, pending()).await?;
// Spawn both of our API servers.
spawn_unprivileged_api(&configuration, unprivileged_api).await?;
spawn_privileged_api(&configuration, privileged_api).await?;

let startup_time = started.elapsed();

Expand Down Expand Up @@ -249,10 +244,59 @@ async fn create_topology(
// When internal telemetry is enabled, expose a Prometheus scrape endpoint that the Datadog Agent will pull from.
if telemetry_enabled {
let prometheus_config = PrometheusConfiguration::from_configuration(configuration)?;
info!(
"Serving telemetry scrape endpoint on {}.",
prometheus_config.listen_address()
);

blueprint
.add_destination("internal_metrics_out", prometheus_config)?
.connect_component("internal_metrics_out", ["internal_metrics_remap"])?;
}

Ok(blueprint)
}

async fn spawn_unprivileged_api(
configuration: &GenericConfiguration, api_builder: APIBuilder,
) -> Result<(), GenericError> {
let api_listen_address = configuration
.try_get_typed("api_listen_address")
.error_context("Failed to get API listen address.")?
.unwrap_or_else(|| ListenAddress::Tcp(([0, 0, 0, 0], 5100).into()));

// TODO: Use something better than `pending()`... perhaps something like a more generalized
// `ComponentShutdownCoordinator` that allows for triggering and waiting for all attached tasks to signal that
// they've shutdown.
tokio::spawn(async move {
info!("Serving unprivileged API on {}.", api_listen_address);

if let Err(e) = api_builder.serve(api_listen_address, pending()).await {
error!("Failed to serve unprivileged API: {}", e);
}
});

Ok(())
}

async fn spawn_privileged_api(
configuration: &GenericConfiguration, api_builder: APIBuilder,
) -> Result<(), GenericError> {
let api_listen_address = configuration
.try_get_typed("secure_api_listen_address")
.error_context("Failed to get secure API listen address.")?
.unwrap_or_else(|| ListenAddress::Tcp(([0, 0, 0, 0], 5101).into()));

// TODO: Use something better than `pending()`... perhaps something like a more generalized
// `ComponentShutdownCoordinator` that allows for triggering and waiting for all attached tasks to signal that
// they've shutdown.
tokio::spawn(async move {
info!("Serving privileged API on {}.", api_listen_address);

if let Err(e) = api_builder.serve(api_listen_address, pending()).await {
error!("Failed to serve privileged API: {}", e);
}
});

Ok(())
}
2 changes: 1 addition & 1 deletion bin/correctness/ground-truth/src/runner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ impl TestRunner {
.with_env_var("DD_API_KEY", "dummy-api-key-correctness-testing")
.with_env_var("DD_ADP_STANDALONE_MODE", "true")
.with_env_var("DD_TELEMETRY_ENABLED", "true")
.with_env_var("DD_PROMETHEUS_LISTEN_ADDR", "tcp://0.0.0.0:5101")
.with_env_var("DD_PROMETHEUS_LISTEN_ADDR", "tcp://0.0.0.0:5102")
.with_exposed_port("tcp", 6000);

group_runner
Expand Down
15 changes: 6 additions & 9 deletions lib/saluki-app/src/api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ use saluki_io::net::{
use tokio::select;
use tonic::{body::BoxBody, server::NamedService, service::RoutesBuilder};
use tower::Service;
use tracing::error;

/// An API builder.
///
Expand Down Expand Up @@ -140,14 +139,12 @@ impl APIBuilder {

// Wait for our shutdown signal, which we'll forward to the listener to stop accepting new connections... or
// capture any errors thrown by the listener itself.
tokio::spawn(async move {
select! {
_ = shutdown => shutdown_handle.shutdown(),
maybe_err = error_handle => if let Some(err) = maybe_err {
error!(error = ?err, "Failed to serve API connection.");
},
}
});
select! {
_ = shutdown => shutdown_handle.shutdown(),
maybe_err = error_handle => if let Some(e) = maybe_err {
return Err(GenericError::from(e))
},
}

Ok(())
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,18 +23,18 @@ use tokio::time::{interval, MissedTickBehavior};
use tracing::debug;
use uuid::Uuid;

const DEFAULT_API_LISTEN_PORT: NonZeroUsize = unsafe { NonZeroUsize::new_unchecked(5100) };
// TODO: This should really come from the binary itself, since we don't actually control the server where our
// `RemoteAgent` gRPC service is exposed from... but it would be very clunky to pass that around so we're just aligning
// the default port here _for now_.
const DEFAULT_API_LISTEN_PORT: NonZeroUsize = unsafe { NonZeroUsize::new_unchecked(5101) };

/// Datadog Status and Flare Destination
///
/// Registers ADP as a remote agent to the Core Agent.
pub struct DatadogStatusFlareConfiguration {
id: String,

display_name: String,

api_listen_port: NonZeroUsize,

client: RemoteAgentClient,
}

Expand Down Expand Up @@ -88,11 +88,8 @@ impl MemoryBounds for DatadogStatusFlareConfiguration {

pub struct DatadogStatusFlare {
id: String,

display_name: String,

api_listen_port: NonZeroUsize,

client: RemoteAgentClient,
}

Expand Down Expand Up @@ -132,7 +129,8 @@ impl Destination for DatadogStatusFlare {

// Time to (re)register with the Core Agent.
//
// TODO: Consider spawning the registration as a task so that the component can keep polling and not slow down the accepting of events and responding of health checks.
// TODO: Consider spawning the registration as a task so that the component can keep polling and not
// slow down the accepting of events and responding of health checks.
_ = register_agent.tick() => {
match client.register_remote_agent_request(&id, &display_name, &api_endpoint, &auth_token).await {
Ok(resp) => {
Expand Down Expand Up @@ -183,7 +181,7 @@ impl RemoteAgent for RemoteAgentImpl {
}

/// Create the RemoteAgent service.
pub fn new_remote_agent_service() -> Result<RemoteAgentServer<RemoteAgentImpl>, GenericError> {
pub fn new_remote_agent_service() -> RemoteAgentServer<RemoteAgentImpl> {
let remote_agent = RemoteAgentImpl { started: Utc::now() };
Ok(RemoteAgentServer::new(remote_agent))
RemoteAgentServer::new(remote_agent)
}
5 changes: 5 additions & 0 deletions lib/saluki-components/src/destinations/prometheus/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,11 @@ impl PrometheusConfiguration {
pub fn from_configuration(config: &GenericConfiguration) -> Result<Self, GenericError> {
Ok(config.as_typed()?)
}

/// Returns the listen address for the Prometheus scrape endpoint.
pub fn listen_address(&self) -> &ListenAddress {
&self.listen_addr
}
}

#[async_trait]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ target:

# Enable internal telemetry endpoint.
DD_TELEMETRY_ENABLED: "true"
DD_PROMETHEUS_LISTEN_ADDR: tcp://127.0.0.1:5101
DD_PROMETHEUS_LISTEN_ADDR: tcp://127.0.0.1:5102

# Test out using allowing up to two concurrent requests when forwarding.
DD_FORWARDER_NUM_WORKERS: "2"
Original file line number Diff line number Diff line change
Expand Up @@ -48,4 +48,4 @@ blackhole:

target_metrics:
- prometheus:
uri: "http://127.0.0.1:5101/scrape"
uri: "http://127.0.0.1:5102/scrape"
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,4 @@ target:

# Enable internal telemetry endpoint.
DD_TELEMETRY_ENABLED: "true"
DD_PROMETHEUS_LISTEN_ADDR: tcp://127.0.0.1:5101
DD_PROMETHEUS_LISTEN_ADDR: tcp://127.0.0.1:5102
Original file line number Diff line number Diff line change
Expand Up @@ -48,4 +48,4 @@ blackhole:

target_metrics:
- prometheus:
uri: "http://127.0.0.1:5101/scrape"
uri: "http://127.0.0.1:5102/scrape"
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,4 @@ target:

# Enable internal telemetry endpoint.
DD_TELEMETRY_ENABLED: "true"
DD_PROMETHEUS_LISTEN_ADDR: tcp://127.0.0.1:5101
DD_PROMETHEUS_LISTEN_ADDR: tcp://127.0.0.1:5102
Original file line number Diff line number Diff line change
Expand Up @@ -47,4 +47,4 @@ blackhole:

target_metrics:
- prometheus:
uri: "http://127.0.0.1:5101/scrape"
uri: "http://127.0.0.1:5102/scrape"
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,4 @@ target:

# Enable internal telemetry endpoint.
DD_TELEMETRY_ENABLED: "true"
DD_PROMETHEUS_LISTEN_ADDR: tcp://127.0.0.1:5101
DD_PROMETHEUS_LISTEN_ADDR: tcp://127.0.0.1:5102
Original file line number Diff line number Diff line change
Expand Up @@ -48,4 +48,4 @@ blackhole:

target_metrics:
- prometheus:
uri: "http://127.0.0.1:5101/scrape"
uri: "http://127.0.0.1:5102/scrape"
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,4 @@ target:

# Enable internal telemetry endpoint.
DD_TELEMETRY_ENABLED: "true"
DD_PROMETHEUS_LISTEN_ADDR: tcp://127.0.0.1:5101
DD_PROMETHEUS_LISTEN_ADDR: tcp://127.0.0.1:5102
Original file line number Diff line number Diff line change
Expand Up @@ -48,4 +48,4 @@ blackhole:

target_metrics:
- prometheus:
uri: "http://127.0.0.1:5101/scrape"
uri: "http://127.0.0.1:5102/scrape"
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,4 @@ target:

# Enable internal telemetry endpoint.
DD_TELEMETRY_ENABLED: "true"
DD_PROMETHEUS_LISTEN_ADDR: tcp://127.0.0.1:5101
DD_PROMETHEUS_LISTEN_ADDR: tcp://127.0.0.1:5102
Original file line number Diff line number Diff line change
Expand Up @@ -50,4 +50,4 @@ blackhole:

target_metrics:
- prometheus:
uri: "http://127.0.0.1:5101/scrape"
uri: "http://127.0.0.1:5102/scrape"
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,4 @@ target:

# Enable internal telemetry endpoint.
DD_TELEMETRY_ENABLED: "true"
DD_PROMETHEUS_LISTEN_ADDR: tcp://127.0.0.1:5101
DD_PROMETHEUS_LISTEN_ADDR: tcp://127.0.0.1:5102
Original file line number Diff line number Diff line change
Expand Up @@ -47,4 +47,4 @@ blackhole:

target_metrics:
- prometheus:
uri: "http://127.0.0.1:5101/scrape"
uri: "http://127.0.0.1:5102/scrape"
Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,4 @@ target:

# Enable internal telemetry endpoint.
DD_TELEMETRY_ENABLED: "true"
DD_PROMETHEUS_LISTEN_ADDR: tcp://127.0.0.1:5101
DD_PROMETHEUS_LISTEN_ADDR: tcp://127.0.0.1:5102
Original file line number Diff line number Diff line change
Expand Up @@ -47,4 +47,4 @@ blackhole:

target_metrics:
- prometheus:
uri: "http://127.0.0.1:5101/scrape"
uri: "http://127.0.0.1:5102/scrape"
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,4 @@ target:

# Enable internal telemetry endpoint.
DD_TELEMETRY_ENABLED: "true"
DD_PROMETHEUS_LISTEN_ADDR: tcp://127.0.0.1:5101
DD_PROMETHEUS_LISTEN_ADDR: tcp://127.0.0.1:5102
Original file line number Diff line number Diff line change
Expand Up @@ -1766,4 +1766,4 @@ blackhole:

target_metrics:
- prometheus:
uri: "http://127.0.0.1:5101/scrape"
uri: "http://127.0.0.1:5102/scrape"
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,4 @@ target:

# Enable internal telemetry endpoint.
DD_TELEMETRY_ENABLED: "true"
DD_PROMETHEUS_LISTEN_ADDR: tcp://127.0.0.1:5101
DD_PROMETHEUS_LISTEN_ADDR: tcp://127.0.0.1:5102
Original file line number Diff line number Diff line change
Expand Up @@ -48,4 +48,4 @@ blackhole:

target_metrics:
- prometheus:
uri: "http://127.0.0.1:5101/scrape"
uri: "http://127.0.0.1:5102/scrape"
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,4 @@ target:

# Enable internal telemetry endpoint.
DD_TELEMETRY_ENABLED: "true"
DD_PROMETHEUS_LISTEN_ADDR: tcp://127.0.0.1:5101
DD_PROMETHEUS_LISTEN_ADDR: tcp://127.0.0.1:5102
Original file line number Diff line number Diff line change
Expand Up @@ -47,4 +47,4 @@ blackhole:

target_metrics:
- prometheus:
uri: "http://127.0.0.1:5101/scrape"
uri: "http://127.0.0.1:5102/scrape"
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,4 @@ target:

# Enable internal telemetry endpoint.
DD_TELEMETRY_ENABLED: "true"
DD_PROMETHEUS_LISTEN_ADDR: tcp://127.0.0.1:5101
DD_PROMETHEUS_LISTEN_ADDR: tcp://127.0.0.1:5102
Loading
Loading