Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ The configuration file is a yaml file that looks like this:
# - local: A dependency that is defined in the config file. These dependencies do not have a remote field and must correspond to either a service defined in the 'services' section or a program defined in the 'x-programs' section.
# - remote: A dependency that is defined in the devservices directory in a remote repository. These configs are automatically fetched from the remote repository and installed. Any dependency with a remote field will be treated as a remote dependency. Example: https://github.com/getsentry/snuba/blob/59a5258ccbb502827ebc1d3b1bf80c607a3301bf/devservices/config.yml#L8
# - modes: A list of modes for the service. Each mode includes a list of dependencies that are used in that mode.
# - healthcheck_timeout: Optional. The number of seconds to wait for all containers to become healthy before failing. Defaults to 180.
x-sentry-service-config:
version: 0.1
service_name: example-service
Expand Down
4 changes: 3 additions & 1 deletion devservices/commands/up.py
Original file line number Diff line number Diff line change
Expand Up @@ -390,7 +390,9 @@ def bring_up_docker_compose_services(
)
exit(1)
try:
check_all_containers_healthy(status, containers_to_check)
check_all_containers_healthy(
status, containers_to_check, timeout=service.config.healthcheck_timeout
)
except ContainerHealthcheckFailedError as e:
status.failure(str(e))
exit(1)
Expand Down
13 changes: 13 additions & 0 deletions devservices/configs/service_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

from devservices.constants import CONFIG_FILE_NAME
from devservices.constants import DEVSERVICES_DIR_NAME
from devservices.constants import HEALTHCHECK_TIMEOUT
from devservices.constants import DependencyType
from devservices.exceptions import ConfigNotFoundError
from devservices.exceptions import ConfigParseError
Expand Down Expand Up @@ -40,6 +41,7 @@ class ServiceConfig:
service_name: str
dependencies: dict[str, Dependency]
modes: dict[str, list[str]]
healthcheck_timeout: int = HEALTHCHECK_TIMEOUT

def __post_init__(self) -> None:
self._validate()
Expand All @@ -59,6 +61,14 @@ def _validate(self) -> None:
if "default" not in self.modes:
raise ConfigValidationError("Default mode is required in service config")

if isinstance(self.healthcheck_timeout, bool) or (
not isinstance(self.healthcheck_timeout, int)
or self.healthcheck_timeout <= 0
):
raise ConfigValidationError(
"healthcheck_timeout must be a positive integer"
)
Comment thread
joshuarli marked this conversation as resolved.

for mode, services in self.modes.items():
if not isinstance(services, list):
raise ConfigValidationError(f"Services in mode '{mode}' must be a list")
Expand Down Expand Up @@ -142,6 +152,9 @@ def load_service_config_from_file(
service_name=service_config_data.get("service_name"),
dependencies=dependencies,
modes=service_config_data.get("modes", {}),
healthcheck_timeout=service_config_data.get("healthcheck_timeout")
if service_config_data.get("healthcheck_timeout") is not None
else HEALTHCHECK_TIMEOUT,
)

return service_config
Expand Down
4 changes: 2 additions & 2 deletions devservices/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ class DependencyType(StrEnum):
DEVSERVICES_CACHE_DIR, "latest_version.txt"
)
DEVSERVICES_LATEST_VERSION_CACHE_TTL = timedelta(minutes=15)
# Healthcheck timeout set to 2 minutes to account for slow healthchecks
HEALTHCHECK_TIMEOUT = 120
# Healthcheck timeout set to 3 minutes to account for slow healthchecks
HEALTHCHECK_TIMEOUT = 180
HEALTHCHECK_INTERVAL = 5
SUPERVISOR_TIMEOUT = 10
19 changes: 14 additions & 5 deletions devservices/utils/docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,25 +43,34 @@ def check_docker_daemon_running() -> None:


def check_all_containers_healthy(
status: Status, containers: list[ContainerNames]
status: Status,
containers: list[ContainerNames],
timeout: int = HEALTHCHECK_TIMEOUT,
) -> None:
"""Ensures all containers are healthy."""
status.info("Waiting for all containers to be healthy")
with concurrent.futures.ThreadPoolExecutor() as healthcheck_executor:
futures = [
healthcheck_executor.submit(wait_for_healthy, container, status)
healthcheck_executor.submit(wait_for_healthy, container, status, timeout)
for container in containers
]
for future in concurrent.futures.as_completed(futures):
future.result()


def wait_for_healthy(container: ContainerNames, status: Status) -> None:
def wait_for_healthy(
container: ContainerNames,
status: Status,
timeout: int = HEALTHCHECK_TIMEOUT,
) -> None:
"""
Polls a Docker container's health status until it becomes healthy or a timeout is reached.
"""
status.info(
f"Waiting for {container.short_name} to be healthy (timeout: {timeout}s)"
)
start = time.time()
while time.time() - start < HEALTHCHECK_TIMEOUT:
while time.time() - start < timeout:
# Run docker inspect to get the container's health status
try:
# For containers with no healthchecks, the output will be "unknown"
Expand Down Expand Up @@ -96,7 +105,7 @@ def wait_for_healthy(container: ContainerNames, status: Status) -> None:
# If not healthy, wait and try again
time.sleep(HEALTHCHECK_INTERVAL)

raise ContainerHealthcheckFailedError(container.short_name, HEALTHCHECK_TIMEOUT)
raise ContainerHealthcheckFailedError(container.short_name, timeout)


@dataclass
Expand Down
3 changes: 2 additions & 1 deletion tests/commands/test_up.py
Original file line number Diff line number Diff line change
Expand Up @@ -865,7 +865,7 @@ def test_up_docker_compose_container_healthcheck_failed(
assert "Starting clickhouse" in captured.out.strip()
assert "Starting redis" in captured.out.strip()
assert (
"Container container1 did not become healthy within 120 seconds."
"Container container1 did not become healthy within 180 seconds."
in captured.out.strip()
)

Expand Down Expand Up @@ -1376,6 +1376,7 @@ def test_up_multiple_modes_overlapping_running_service(
mock_check_all_containers_healthy.assert_called_once_with(
mock.ANY,
["container1", "container2"],
timeout=mock.ANY,
)

captured = capsys.readouterr()
Expand Down
41 changes: 41 additions & 0 deletions tests/configs/test_service_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ def test_load_service_config_from_file(
for key, value in dependencies.items()
},
"modes": modes,
"healthcheck_timeout": 180,
}


Expand All @@ -131,9 +132,49 @@ def test_load_service_config_from_file_no_dependencies(tmp_path: Path) -> None:
"service_name": "example-service",
"dependencies": {},
"modes": {"default": []},
"healthcheck_timeout": 180,
}


def test_load_service_config_from_file_null_healthcheck_timeout(
tmp_path: Path,
) -> None:
config = {
"x-sentry-service-config": {
"version": 0.1,
"service_name": "example-service",
"modes": {"default": []},
"healthcheck_timeout": None,
},
"services": {},
}
create_config_file(tmp_path, config)

service_config = load_service_config_from_file(str(tmp_path))
assert service_config.healthcheck_timeout == 180


@pytest.mark.parametrize("invalid_timeout", [-1, 0, "120", True, False])
def test_load_service_config_from_file_invalid_healthcheck_timeout(
tmp_path: Path,
invalid_timeout: object,
) -> None:
config = {
"x-sentry-service-config": {
"version": 0.1,
"service_name": "example-service",
"modes": {"default": []},
"healthcheck_timeout": invalid_timeout,
},
"services": {},
}
create_config_file(tmp_path, config)

with pytest.raises(ConfigValidationError) as e:
load_service_config_from_file(str(tmp_path))
assert str(e.value) == "healthcheck_timeout must be a positive integer"


def test_load_service_config_from_file_missing_config(tmp_path: Path) -> None:
with pytest.raises(ConfigNotFoundError) as e:
load_service_config_from_file(str(tmp_path))
Expand Down
4 changes: 4 additions & 0 deletions tests/utils/test_docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -533,10 +533,12 @@ def test_check_all_containers_healthy_success(
mock.call(
ContainerNames(name="devservices-container1", short_name="container1"),
mock_status,
180,
),
mock.call(
ContainerNames(name="devservices-container2", short_name="container2"),
mock_status,
180,
),
]
)
Expand Down Expand Up @@ -671,10 +673,12 @@ def test_check_all_containers_healthy_failure(
mock.call(
ContainerNames(name="devservices-container1", short_name="container1"),
mock_status,
180,
),
mock.call(
ContainerNames(name="devservices-container2", short_name="container2"),
mock_status,
180,
),
]
)
Loading