diff --git a/.release-please-manifest.json b/.release-please-manifest.json index c34da872b1..2a0b4bb541 100644 --- a/.release-please-manifest.json +++ b/.release-please-manifest.json @@ -1,21 +1,21 @@ { - "cli": "0.113.1", - "api": "0.113.1", - "shared": "0.113.1", - "terraform/provider": "0.113.1", + "cli": "0.114.0", + "api": "0.114.0", + "shared": "0.114.0", + "terraform/provider": "0.114.0", "terraform/modules/happy-dns-ecs": "1.5.0", "terraform/modules/happy-env-ecs": "3.3.0", "terraform/modules/happy-env-eks": "4.12.2", "terraform/modules/happy-github-ci-role": "1.5.0", "terraform/modules/happy-route53": "1.3.0", "terraform/modules/happy-service-ecs": "2.1.0", - "terraform/modules/happy-service-eks": "3.17.2", - "terraform/modules/happy-stack-ecs": "2.1.0", - "terraform/modules/happy-stack-eks": "4.20.0", + "terraform/modules/happy-service-eks": "3.18.0", + "terraform/modules/happy-stack-ecs": "2.2.0", + "terraform/modules/happy-stack-eks": "4.21.0", "terraform/modules/happy-tfe-okta-app": "3.0.0", "terraform/modules/happy-tfe-user": "1.3.0", "terraform/modules/happy-ingress-eks": "2.9.0", - "hvm": "0.113.1", + "hvm": "0.114.0", "hapi-proto": "0.1.0", "terraform/modules/happy-cloudfront": "1.0.0", "helm-charts/charts/stack": "0.1.0" diff --git a/api/CHANGELOG.md b/api/CHANGELOG.md index a6be65c484..3ae35a5830 100644 --- a/api/CHANGELOG.md +++ b/api/CHANGELOG.md @@ -1,5 +1,12 @@ # Changelog +## [0.114.0](https://github.com/chanzuckerberg/happy/compare/api-v0.113.1...api-v0.114.0) (2023-10-24) + + +### Miscellaneous Chores + +* **api:** Synchronize happy platform versions + ## [0.113.1](https://github.com/chanzuckerberg/happy/compare/api-v0.113.0...api-v0.113.1) (2023-10-18) diff --git a/cli/CHANGELOG.md b/cli/CHANGELOG.md index 6f8f6a96bd..c122f02cfa 100644 --- a/cli/CHANGELOG.md +++ b/cli/CHANGELOG.md @@ -1,5 +1,17 @@ # Changelog +## [0.114.0](https://github.com/chanzuckerberg/happy/compare/cli-v0.113.1...cli-v0.114.0) (2023-10-24) + + +### Features + +* fixed batch delete happy stack ([#2613](https://github.com/chanzuckerberg/happy/issues/2613)) ([21f927b](https://github.com/chanzuckerberg/happy/commit/21f927b9ac095bb2645b6e2a51d914cbfe1a265c)) + + +### Bug Fixes + +* sync go versions ([#2635](https://github.com/chanzuckerberg/happy/issues/2635)) ([e479c13](https://github.com/chanzuckerberg/happy/commit/e479c136a1f2cf83b4e6b430097e74d5512f31ee)) + ## [0.113.1](https://github.com/chanzuckerberg/happy/compare/cli-v0.113.0...cli-v0.113.1) (2023-10-18) diff --git a/cli/COVERAGE b/cli/COVERAGE index 56664f9c2b..6d6d0c7862 100644 --- a/cli/COVERAGE +++ b/cli/COVERAGE @@ -1 +1 @@ -22.97 \ No newline at end of file +22.65 \ No newline at end of file diff --git a/cli/cmd/delete.go b/cli/cmd/delete.go index c575d3b20c..e802e52721 100644 --- a/cli/cmd/delete.go +++ b/cli/cmd/delete.go @@ -32,7 +32,16 @@ var deleteCmd = &cobra.Command{ Short: "Delete an existing stack", Long: "Delete the stack with the given name.", SilenceUsage: true, - RunE: runDelete, + RunE: func(cmd *cobra.Command, args []string) error { + for _, stackName := range args { + log.Infof("Deleting Happy Stack %s\n", stackName) + if err := runDelete(cmd, stackName); err != nil { + log.Errorf("Unable to delete Happy Stack %s: %s", stackName, err.Error()) + continue + } + } + return nil + }, PreRunE: happyCmd.Validate( cobra.MinimumNArgs(1), happyCmd.IsStackNameDNSCharset, @@ -46,115 +55,112 @@ var deleteCmd = &cobra.Command{ ), } -func runDelete(cmd *cobra.Command, args []string) error { - for _, stackName := range args { - happyClient, err := makeHappyClient(cmd, sliceName, stackName, []string{tag}, createTag) - if err != nil { - return errors.Wrap(err, "unable to initialize the happy client") - } - ctx := context.WithValue(cmd.Context(), options.DryRunKey, dryRun) - message := workspace_repo.Message(fmt.Sprintf("Happy %s Delete Stack [%s]", util.GetVersion().Version, stackName)) - err = validate( - validateGitTree(happyClient.HappyConfig.GetProjectRoot()), - validateStackNameAvailable(ctx, happyClient.StackService, stackName, force), - validateStackExists(ctx, stackName, happyClient, message), - validateTFEBackLog(ctx, happyClient.AWSBackend), - ) - if err != nil { - log.Warnf("failed one of the happy client validations %s", err.Error()) - continue - } - - stacks, err := happyClient.StackService.GetStacks(ctx) - if err != nil { - return err - } - - stack, ok := stacks[stackName] - if !ok { - return errors.Wrapf(err, "stack %s not found", stackName) - } - - // Run all necessary tasks before deletion - taskOrchestrator := orchestrator. - NewOrchestrator(). - WithHappyConfig(happyClient.HappyConfig). - WithBackend(happyClient.AWSBackend) - err = taskOrchestrator.RunTasks(ctx, stack, backend.TaskTypeDelete) - if err != nil { - if !force { - if !diagnostics.IsInteractiveContext(ctx) { - return err - } - proceed := false - prompt := &survey.Confirm{ - Message: fmt.Sprintf("Error running tasks while trying to delete %s (%s); Continue? ", stackName, err.Error()), - } - err = survey.AskOne(prompt, &proceed) - if err != nil { - return errors.Wrapf(err, "failed to ask for confirmation") - } - if !proceed { - return err - } - } - } - - hasState, err := happyClient.StackService.HasState(ctx, stackName) - if err != nil { - return errors.Wrapf(err, "unable to determine whether the stack has state") - } - - runopts := workspace_repo.Message(fmt.Sprintf("Happy %s Delete Stack [%s]", util.GetVersion().Version, stackName)) - if !hasState { - log.Info("No state found for stack, workspace will be removed") - return happyClient.StackService.Remove(ctx, stackName, runopts) - } +func runDelete(cmd *cobra.Command, stackName string) error { + happyClient, err := makeHappyClient(cmd, sliceName, stackName, []string{tag}, createTag) + if err != nil { + return errors.Wrap(err, "unable to initialize the happy client") + } + ctx := context.WithValue(cmd.Context(), options.DryRunKey, dryRun) + message := workspace_repo.Message(fmt.Sprintf("Happy %s Delete Stack [%s]", util.GetVersion().Version, stackName)) + err = validate( + validateGitTree(happyClient.HappyConfig.GetProjectRoot()), + validateStackNameAvailable(ctx, happyClient.StackService, stackName, force), + validateStackExists(ctx, stackName, happyClient, message), + validateTFEBackLog(ctx, happyClient.AWSBackend), + ) + if err != nil { + log.Warnf("failed one of the happy client validations %s", err.Error()) + return err + } - // Destroy the stack - destroySuccess := true - waitopts := options.WaitOptions{ - StackName: stackName, - Orchestrator: taskOrchestrator, - Services: happyClient.HappyConfig.GetServices(), - } + stacks, err := happyClient.StackService.GetStacks(ctx) + if err != nil { + return err + } - tfDirPath := happyClient.HappyConfig.TerraformDirectory() - happyProjectRoot := happyClient.HappyConfig.GetProjectRoot() - srcDir := filepath.Join(happyProjectRoot, tfDirPath) - if err = stack.Destroy(ctx, srcDir, waitopts, runopts); err != nil { - // log error and set a flag, but do not return - log.Errorf("Failed to destroy stack: '%s'", err) - destroySuccess = false - } + stack, ok := stacks[stackName] + if !ok { + return errors.Wrapf(err, "stack %s not found", stackName) + } - doRemoveWorkspace := false - if !destroySuccess { + // Run all necessary tasks before deletion + taskOrchestrator := orchestrator. + NewOrchestrator(). + WithHappyConfig(happyClient.HappyConfig). + WithBackend(happyClient.AWSBackend) + err = taskOrchestrator.RunTasks(ctx, stack, backend.TaskTypeDelete) + if err != nil { + if !force { if !diagnostics.IsInteractiveContext(ctx) { - return errors.Errorf("Error while destroying %s; resources might remain, aborting workspace removal in non-interactive mode.", stackName) + return err } - proceed := false - prompt := &survey.Confirm{Message: fmt.Sprintf("Error while destroying %s; resources might remain. Continue to remove workspace? ", stackName)} + prompt := &survey.Confirm{ + Message: fmt.Sprintf("Error running tasks while trying to delete %s (%s); Continue? ", stackName, err.Error()), + } err = survey.AskOne(prompt, &proceed) if err != nil { return errors.Wrapf(err, "failed to ask for confirmation") } - if !proceed { return err } + } + } + + hasState, err := happyClient.StackService.HasState(ctx, stackName) + if err != nil { + return errors.Wrapf(err, "unable to determine whether the stack has state") + } + + runopts := workspace_repo.Message(fmt.Sprintf("Happy %s Delete Stack [%s]", util.GetVersion().Version, stackName)) + if !hasState { + log.Info("No state found for stack, workspace will be removed") + return happyClient.StackService.Remove(ctx, stackName, runopts) + } + + // Destroy the stack + destroySuccess := true + waitopts := options.WaitOptions{ + StackName: stackName, + Orchestrator: taskOrchestrator, + Services: happyClient.HappyConfig.GetServices(), + } + + tfDirPath := happyClient.HappyConfig.TerraformDirectory() + happyProjectRoot := happyClient.HappyConfig.GetProjectRoot() + srcDir := filepath.Join(happyProjectRoot, tfDirPath) + if err = stack.Destroy(ctx, srcDir, waitopts, runopts); err != nil { + // log error and set a flag, but do not return + log.Errorf("Failed to destroy stack: '%s'", err) + destroySuccess = false + } + + doRemoveWorkspace := false + if !destroySuccess { + if !diagnostics.IsInteractiveContext(ctx) { + return errors.Errorf("Error while destroying %s; resources might remain, aborting workspace removal in non-interactive mode.", stackName) + } - doRemoveWorkspace = true + proceed := false + prompt := &survey.Confirm{Message: fmt.Sprintf("Error while destroying %s; resources might remain. Continue to remove workspace? ", stackName)} + err = survey.AskOne(prompt, &proceed) + if err != nil { + return errors.Wrapf(err, "failed to ask for confirmation") } - // Remove the stack from state - // TODO: are these the right error messages? - if destroySuccess || doRemoveWorkspace { - return happyClient.StackService.Remove(ctx, stackName, runopts) - } else { - log.Warnf("Stack %s was not deleted fully", stackName) + if !proceed { + return err } + + doRemoveWorkspace = true + } + + // Remove the stack from state + // TODO: are these the right error messages? + if destroySuccess || doRemoveWorkspace { + return happyClient.StackService.Remove(ctx, stackName, runopts) } + log.Warnf("Stack %s was not deleted fully", stackName) return nil } diff --git a/cli/cmd/init.go b/cli/cmd/init.go index 856423e5d1..2c5f954f2f 100644 --- a/cli/cmd/init.go +++ b/cli/cmd/init.go @@ -301,49 +301,50 @@ func validateConfigurationIntegirty(ctx context.Context, slice string, happyClie logrus.Debug("Scheduling validateConfigurationIntegirty()") return func() error { logrus.Debug("Running validateConfigurationIntegirty()") - // These services are configured in docker-compose.yml, and have their containers built - availableServices, err := happyClient.ArtifactBuilder.GetServices(ctx) + + // Happy configuration is spread across these files: + // * .happy/config.json defines environments, specifies services, slices, features and tasks + // * docker-compose.yml defines services and their build configuration + // * terraform code from .happy/terraform/envs//*.tf references services and their settings + + // All services referenced through TF code must be present in config.json. All services listed in config.json + // must be declared in docker-compose.yml and have a build section. + + // These services are configured in docker-compose.yml + composeServices, err := happyClient.ArtifactBuilder.GetAllServices(ctx) if err != nil { return errors.Wrap(err, "unable to get available services") } - // NOTE: availableServices will only contain the services that are a part of the slice. - // That means we have to iterate over those first to make sure they are all in the config.json and - // not the other way around. + // ConfigServices are configured in config.json, and are a subset of the services in docker-compose.yml. + // Every service from config.json must be present in docker-compose.yml and must have a build section. configServices := happyClient.HappyConfig.GetServices() ss := sets.NewStringSet().Add(configServices...) - for serviceName, service := range availableServices { - // ignore services that don't have a build section - // as these are not deployable services - if service.Build == nil { - continue - } - ok := ss.ContainsElement(serviceName) + for _, serviceName := range configServices { + service, ok := composeServices[serviceName] if !ok { - return errors.Errorf("service %s was referenced in docker-compose.yml, but not referenced in .happy/config.json services array", serviceName) + return errors.Errorf("service '%s' is not configured in docker-compose.yml, but referenced in your .happy/config.json services array", serviceName) + } + if service.Build == nil { + return errors.Errorf("service '%s' is not configured to be built in docker-compose.yml, but referenced in your .happy/config.json services array", serviceName) } } - // These services are referenced in terraform code for the environment + // These services are referenced in terraform code for the environment, and must be present in config.json + // (and docker-compose.yml as well -- see the check above). srcDir := filepath.Join(happyClient.HappyConfig.GetProjectRoot(), happyClient.HappyConfig.TerraformDirectory()) deployedServices, err := tf.NewTfParser().ParseServices(srcDir) if err != nil { return errors.Wrap(err, "unable to parse terraform code") } - for service := range deployedServices { - if _, ok := availableServices[service]; !ok { - return errors.Errorf("service %s is not configured in docker-compose.yml, but referenced in your terraform code", service) - } - found := false - for _, configuredService := range happyClient.HappyConfig.GetServices() { - if service == configuredService { - found = true - break - } + for serviceName := range deployedServices { + if _, ok := composeServices[serviceName]; !ok { + return errors.Errorf("service '%s' is not configured in docker-compose.yml, but referenced in your terraform code", serviceName) } + found := ss.ContainsElement(serviceName) if !found { - return errors.Errorf("service %s is not configured in ./happy/config.json, but referenced in your terraform code", service) + return errors.Errorf("service %s is not configured in ./happy/config.json, but referenced in your terraform code", serviceName) } } diff --git a/cli/pkg/artifact_builder/COVERAGE b/cli/pkg/artifact_builder/COVERAGE index 32ccc6d5b4..7388c4b58c 100644 --- a/cli/pkg/artifact_builder/COVERAGE +++ b/cli/pkg/artifact_builder/COVERAGE @@ -1 +1 @@ -58.8 \ No newline at end of file +56.7 \ No newline at end of file diff --git a/cli/pkg/artifact_builder/artifact_builder.go b/cli/pkg/artifact_builder/artifact_builder.go index fedf08d4ee..212f405639 100644 --- a/cli/pkg/artifact_builder/artifact_builder.go +++ b/cli/pkg/artifact_builder/artifact_builder.go @@ -595,3 +595,14 @@ func (ab ArtifactBuilder) GetServices(ctx context.Context) (map[string]ServiceCo } return config.Services, nil } + +func (ab ArtifactBuilder) GetAllServices(ctx context.Context) (map[string]ServiceConfig, error) { + bc := ab.config.Clone() + bc.configData = nil + bc.Profile = nil + config, err := bc.GetConfigData(ctx) + if err != nil { + return nil, errors.Wrap(err, "unable to get config data") + } + return config.Services, nil +} diff --git a/cli/pkg/artifact_builder/artifact_builder_factory.go b/cli/pkg/artifact_builder/artifact_builder_factory.go index 2a8eca0bb0..5d441d84ba 100644 --- a/cli/pkg/artifact_builder/artifact_builder_factory.go +++ b/cli/pkg/artifact_builder/artifact_builder_factory.go @@ -31,6 +31,7 @@ type ArtifactBuilderIface interface { Pull(ctx context.Context, stackName, tag string) (map[string]string, error) BuildAndPush(ctx context.Context) error GetServices(ctx context.Context) (map[string]ServiceConfig, error) + GetAllServices(ctx context.Context) (map[string]ServiceConfig, error) } func CreateArtifactBuilder(ctx context.Context) ArtifactBuilderIface { diff --git a/cli/pkg/artifact_builder/builder_config.go b/cli/pkg/artifact_builder/builder_config.go index 125ccb201d..fe7ca598ec 100644 --- a/cli/pkg/artifact_builder/builder_config.go +++ b/cli/pkg/artifact_builder/builder_config.go @@ -46,6 +46,20 @@ func NewBuilderConfig() *BuilderConfig { } } +func (b *BuilderConfig) Clone() *BuilderConfig { + return &BuilderConfig{ + composeFile: b.composeFile, + composeEnvFile: b.composeEnvFile, + dockerRepo: b.dockerRepo, + env: b.env, + StackName: b.StackName, + Profile: b.Profile, + configData: b.configData, + Executor: b.Executor, + DryRun: b.DryRun, + } +} + func (b *BuilderConfig) WithBootstrap(bootstrap *config.Bootstrap) *BuilderConfig { b.composeFile = bootstrap.DockerComposeConfigPath return b diff --git a/cli/pkg/artifact_builder/dry_run_artifact_builder.go b/cli/pkg/artifact_builder/dry_run_artifact_builder.go index 01fbd4e164..5c4ce95b21 100644 --- a/cli/pkg/artifact_builder/dry_run_artifact_builder.go +++ b/cli/pkg/artifact_builder/dry_run_artifact_builder.go @@ -96,3 +96,14 @@ func (ab *DryRunArtifactBuilder) GetServices(ctx context.Context) (map[string]Se } return config.Services, nil } + +func (ab *DryRunArtifactBuilder) GetAllServices(ctx context.Context) (map[string]ServiceConfig, error) { + bc := ab.config.Clone() + bc.configData = nil + bc.Profile = nil + config, err := bc.GetConfigData(ctx) + if err != nil { + return nil, errors.Wrap(err, "unable to get config data") + } + return config.Services, nil +} diff --git a/examples/integration_test/.happy/terraform/envs/rdev/main.tf b/examples/integration_test/.happy/terraform/envs/rdev/main.tf index 3047aba84b..28136f56aa 100644 --- a/examples/integration_test/.happy/terraform/envs/rdev/main.tf +++ b/examples/integration_test/.happy/terraform/envs/rdev/main.tf @@ -1,5 +1,5 @@ module "stack" { - source = "github.com:chanzuckerberg/happy//terraform/modules/happy-stack-eks?ref=main" + source = "git@github.com:chanzuckerberg/happy//terraform/modules/happy-stack-eks?ref=main" image_tag = var.image_tag image_tags = jsondecode(var.image_tags) @@ -16,15 +16,17 @@ module "stack" { services = { frontend = { name = "frontend" - desired_count = 1 + desired_count = 10 // the maximum number of copies of this service it can autoscale to max_count = 50 // the signal used to trigger autoscaling (ie. 50% of CPU means scale up) scaling_cpu_threshold_percentage = 50 // the port the service is running on - port = 3000 - memory = "100Mi" - cpu = "100m" + port = 3000 + memory = "500Mi" + memory_requests = "300Mi" + cpu = "500m" + cpu_requests = "500m" // an endpoint that returns a 200. Your service will not start if this endpoint is not healthy health_check_path = "/health" // oneof: INTERNAL, EXTERNAL, PRIVATE, TARGET_GROUP_ONLY, IMAGE_TEMPLATE @@ -42,8 +44,8 @@ module "stack" { // Try to always select arm since it comes with a lot of cost savings and performance // benefits and has little to no impact on developers. platform_architecture = "arm64" - scan_on_push = true - tag_mutability = false + scan_on_push = true + tag_mutability = false } } diff --git a/examples/integration_test/src/api/Dockerfile b/examples/integration_test/src/api/Dockerfile index e23a38d129..d957783247 100644 --- a/examples/integration_test/src/api/Dockerfile +++ b/examples/integration_test/src/api/Dockerfile @@ -9,11 +9,12 @@ RUN GOPROXY=direct go build -o api FROM alpine:3.9 WORKDIR /app RUN apk update && apk upgrade && apk --no-cache add curl -RUN apk add --no-cache git make gcc g++ libc-dev pkgconfig \ - libxml2-dev libxslt-dev postgresql-dev coreutils curl wget bash \ - gnupg tar linux-headers bison readline-dev readline zlib-dev \ - zlib yaml-dev autoconf ncurses-dev curl-dev apache2-dev \ - libx11-dev libffi-dev tcl-dev tk-dev openjdk8 +# # Uncomment the statement below to detect vulnerabilities +# RUN apk add --no-cache git make gcc g++ libc-dev pkgconfig \ +# libxml2-dev libxslt-dev postgresql-dev coreutils curl wget bash \ +# gnupg tar linux-headers bison readline-dev readline zlib-dev \ +# zlib yaml-dev autoconf ncurses-dev curl-dev apache2-dev \ +# libx11-dev libffi-dev tcl-dev tk-dev openjdk8 COPY --from=builder /app/api /app/ EXPOSE 3000 ENTRYPOINT ./api \ No newline at end of file diff --git a/hvm/CHANGELOG.md b/hvm/CHANGELOG.md index 9c82e8329a..f8d9ca97fd 100644 --- a/hvm/CHANGELOG.md +++ b/hvm/CHANGELOG.md @@ -1,5 +1,12 @@ # Changelog +## [0.114.0](https://github.com/chanzuckerberg/happy/compare/hvm-v0.113.1...hvm-v0.114.0) (2023-10-24) + + +### Miscellaneous Chores + +* **hvm:** Synchronize happy platform versions + ## [0.113.1](https://github.com/chanzuckerberg/happy/compare/hvm-v0.113.0...hvm-v0.113.1) (2023-10-18) diff --git a/shared/CHANGELOG.md b/shared/CHANGELOG.md index 50d5f53581..40da6d4745 100644 --- a/shared/CHANGELOG.md +++ b/shared/CHANGELOG.md @@ -1,5 +1,12 @@ # Changelog +## [0.114.0](https://github.com/chanzuckerberg/happy/compare/shared-v0.113.1...shared-v0.114.0) (2023-10-24) + + +### Bug Fixes + +* sync go versions ([#2635](https://github.com/chanzuckerberg/happy/issues/2635)) ([e479c13](https://github.com/chanzuckerberg/happy/commit/e479c136a1f2cf83b4e6b430097e74d5512f31ee)) + ## [0.113.1](https://github.com/chanzuckerberg/happy/compare/shared-v0.113.0...shared-v0.113.1) (2023-10-18) diff --git a/shared/client/util.go b/shared/client/util.go index 579591b290..34ef9f0d5e 100644 --- a/shared/client/util.go +++ b/shared/client/util.go @@ -21,7 +21,6 @@ func ParseResponse[T interface{}](resp *http.Response, result *T) error { if err != nil { return errors.Wrap(err, "failed to unmarshal response body") } - return nil } diff --git a/terraform/modules/happy-service-eks/CHANGELOG.md b/terraform/modules/happy-service-eks/CHANGELOG.md index 487a8bdfa7..710aceee25 100644 --- a/terraform/modules/happy-service-eks/CHANGELOG.md +++ b/terraform/modules/happy-service-eks/CHANGELOG.md @@ -1,5 +1,12 @@ # Changelog +## [3.18.0](https://github.com/chanzuckerberg/happy/compare/happy-service-eks-v3.17.2...happy-service-eks-v3.18.0) (2023-10-24) + + +### Features + +* Enable support for pod disruption budgets and pod anti-affinity rules ([#2532](https://github.com/chanzuckerberg/happy/issues/2532)) ([71e7cd6](https://github.com/chanzuckerberg/happy/commit/71e7cd6b49aa1a3f7411fee8bf0e88c9b30df625)) + ## [3.17.2](https://github.com/chanzuckerberg/happy/compare/happy-service-eks-v3.17.1...happy-service-eks-v3.17.2) (2023-10-16) diff --git a/terraform/modules/happy-service-eks/README.md b/terraform/modules/happy-service-eks/README.md index 5463512dca..bdee453b26 100644 --- a/terraform/modules/happy-service-eks/README.md +++ b/terraform/modules/happy-service-eks/README.md @@ -35,6 +35,7 @@ | [kubernetes_deployment_v1.deployment](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/deployment_v1) | resource | | [kubernetes_horizontal_pod_autoscaler_v1.hpa](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/horizontal_pod_autoscaler_v1) | resource | | [kubernetes_manifest.this](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/manifest) | resource | +| [kubernetes_pod_disruption_budget_v1.pdb](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/pod_disruption_budget_v1) | resource | | [kubernetes_service_v1.service](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/service_v1) | resource | | [random_pet.this](https://registry.terraform.io/providers/hashicorp/random/latest/docs/resources/pet) | resource | | [aws_lb.this](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/lb) | data source | @@ -72,6 +73,7 @@ | [initial\_delay\_seconds](#input\_initial\_delay\_seconds) | The initial delay in seconds for the liveness and readiness probes. | `number` | `30` | no | | [k8s\_namespace](#input\_k8s\_namespace) | K8S namespace for this service | `string` | n/a | yes | | [max\_count](#input\_max\_count) | The maximum number of instances of this task that should be running across our cluster | `number` | `2` | no | +| [max\_unavailable\_count](#input\_max\_unavailable\_count) | The maximum number or percentage of pods that can be unavailable during a rolling update. For example: `1` or `20%` | `string` | `"1"` | no | | [memory](#input\_memory) | Memory in megabits per pod | `string` | `"100Mi"` | no | | [memory\_requests](#input\_memory\_requests) | Memory requests per pod | `string` | `"10Mi"` | no | | [period\_seconds](#input\_period\_seconds) | The period in seconds used for the liveness and readiness probes. | `number` | `3` | no | diff --git a/terraform/modules/happy-service-eks/main.tf b/terraform/modules/happy-service-eks/main.tf index 597e3a8eff..51863d5ae5 100644 --- a/terraform/modules/happy-service-eks/main.tf +++ b/terraform/modules/happy-service-eks/main.tf @@ -3,6 +3,9 @@ data "aws_region" "current" {} locals { tags_string = join(",", [for key, val in local.routing_tags : "${key}=${val}"]) service_type = (var.routing.service_type == "PRIVATE" || var.routing.service_mesh) ? "ClusterIP" : "NodePort" + match_labels = { + app = var.routing.service_name + } labels = merge({ app = var.routing.service_name "app.kubernetes.io/name" = var.stack_name @@ -48,9 +51,7 @@ resource "kubernetes_deployment_v1" "deployment" { } selector { - match_labels = { - app = var.routing.service_name - } + match_labels = local.match_labels } template { @@ -79,9 +80,7 @@ resource "kubernetes_deployment_v1" "deployment" { spec { service_account_name = var.aws_iam.service_account_name == null ? module.iam_service_account.service_account_name : var.aws_iam.service_account_name - node_selector = merge({ - "kubernetes.io/arch" = var.gpu != null ? "amd64" : var.platform_architecture - }, var.gpu != null ? { "nvidia.com/gpu.present" = "true" } : {}, var.additional_node_selectors) + dynamic "toleration" { for_each = var.gpu != null ? [1] : [] content { @@ -99,6 +98,51 @@ resource "kubernetes_deployment_v1" "deployment" { } } + topology_spread_constraint { + max_skew = 3 + #TODO: Once min_domains are supported, uncomment line below. https://github.com/hashicorp/terraform-provider-kubernetes/issues/2292 + #min_domains = 3 + topology_key = "topology.kubernetes.io/zone" + when_unsatisfiable = "DoNotSchedule" + label_selector { + match_labels = local.match_labels + } + } + + affinity { + node_affinity { + required_during_scheduling_ignored_during_execution { + node_selector_term { + match_expressions { + key = "kubernetes.io/arch" + operator = "In" + values = [var.platform_architecture] + } + } + } + } + pod_anti_affinity { + preferred_during_scheduling_ignored_during_execution { + weight = 100 + pod_affinity_term { + topology_key = "kubernetes.io/hostname" + label_selector { + match_labels = local.match_labels + } + } + } + preferred_during_scheduling_ignored_during_execution { + weight = 100 + pod_affinity_term { + topology_key = "topology.kubernetes.io/zone" + label_selector { + match_labels = local.match_labels + } + } + } + } + } + restart_policy = "Always" container { @@ -475,3 +519,19 @@ resource "kubernetes_horizontal_pod_autoscaler_v1" "hpa" { } } } + +resource "kubernetes_pod_disruption_budget_v1" "pdb" { + count = var.routing.service_type == "IMAGE_TEMPLATE" ? 0 : 1 + metadata { + name = var.routing.service_name + namespace = var.k8s_namespace + labels = local.labels + } + + spec { + max_unavailable = var.max_unavailable_count + selector { + match_labels = local.match_labels + } + } +} diff --git a/terraform/modules/happy-service-eks/variables.tf b/terraform/modules/happy-service-eks/variables.tf index b47cfdbfdb..7d711be07e 100644 --- a/terraform/modules/happy-service-eks/variables.tf +++ b/terraform/modules/happy-service-eks/variables.tf @@ -363,4 +363,10 @@ variable "scan_on_push" { type = bool description = "Whether to enable image scan on push, disabled by default." default = false +} + +variable "max_unavailable_count" { + type = string + description = "The maximum number or percentage of pods that can be unavailable during a rolling update. For example: `1` or `20%`" + default = "1" } \ No newline at end of file diff --git a/terraform/modules/happy-stack-ecs/CHANGELOG.md b/terraform/modules/happy-stack-ecs/CHANGELOG.md index 88f37dd1c4..b119dac1ba 100644 --- a/terraform/modules/happy-stack-ecs/CHANGELOG.md +++ b/terraform/modules/happy-stack-ecs/CHANGELOG.md @@ -1,5 +1,12 @@ # Changelog +## [2.2.0](https://github.com/chanzuckerberg/happy/compare/happy-stack-ecs-v2.1.0...happy-stack-ecs-v2.2.0) (2023-10-24) + + +### Features + +* bump happy provider version ([#2508](https://github.com/chanzuckerberg/happy/issues/2508)) ([334cb3e](https://github.com/chanzuckerberg/happy/commit/334cb3e673a1e362973fabfa268649a6baa32f5d)) + ## [2.1.0](https://github.com/chanzuckerberg/happy/compare/happy-stack-ecs-v2.0.1...happy-stack-ecs-v2.1.0) (2023-08-29) diff --git a/terraform/modules/happy-stack-ecs/README.md b/terraform/modules/happy-stack-ecs/README.md index 8c4f02b941..2cca5dc0df 100644 --- a/terraform/modules/happy-stack-ecs/README.md +++ b/terraform/modules/happy-stack-ecs/README.md @@ -6,7 +6,7 @@ | [terraform](#requirement\_terraform) | >= 1.3 | | [aws](#requirement\_aws) | >= 5.14 | | [datadog](#requirement\_datadog) | >= 3.20.0 | -| [happy](#requirement\_happy) | >= 0.97.1 | +| [happy](#requirement\_happy) | >= 0.108.0 | ## Providers @@ -14,7 +14,7 @@ |------|---------| | [aws](#provider\_aws) | >= 5.14 | | [datadog](#provider\_datadog) | >= 3.20.0 | -| [happy](#provider\_happy) | >= 0.97.1 | +| [happy](#provider\_happy) | >= 0.108.0 | ## Modules diff --git a/terraform/modules/happy-stack-ecs/versions.tf b/terraform/modules/happy-stack-ecs/versions.tf index b86391d9c3..b311dc1176 100644 --- a/terraform/modules/happy-stack-ecs/versions.tf +++ b/terraform/modules/happy-stack-ecs/versions.tf @@ -10,7 +10,7 @@ terraform { } happy = { source = "chanzuckerberg/happy" - version = ">= 0.97.1" + version = ">= 0.108.0" } } required_version = ">= 1.3" diff --git a/terraform/modules/happy-stack-eks/CHANGELOG.md b/terraform/modules/happy-stack-eks/CHANGELOG.md index 9ad4ca353a..fa07e3cc26 100644 --- a/terraform/modules/happy-stack-eks/CHANGELOG.md +++ b/terraform/modules/happy-stack-eks/CHANGELOG.md @@ -1,5 +1,13 @@ # Changelog +## [4.21.0](https://github.com/chanzuckerberg/happy/compare/happy-stack-eks-v4.20.0...happy-stack-eks-v4.21.0) (2023-10-24) + + +### Features + +* bump happy provider version ([#2508](https://github.com/chanzuckerberg/happy/issues/2508)) ([334cb3e](https://github.com/chanzuckerberg/happy/commit/334cb3e673a1e362973fabfa268649a6baa32f5d)) +* Enable support for pod disruption budgets and pod anti-affinity rules ([#2532](https://github.com/chanzuckerberg/happy/issues/2532)) ([71e7cd6](https://github.com/chanzuckerberg/happy/commit/71e7cd6b49aa1a3f7411fee8bf0e88c9b30df625)) + ## [4.20.0](https://github.com/chanzuckerberg/happy/compare/happy-stack-eks-v4.19.0...happy-stack-eks-v4.20.0) (2023-10-23) diff --git a/terraform/modules/happy-stack-eks/README.md b/terraform/modules/happy-stack-eks/README.md index 485583c965..de8619b331 100644 --- a/terraform/modules/happy-stack-eks/README.md +++ b/terraform/modules/happy-stack-eks/README.md @@ -6,7 +6,7 @@ | [terraform](#requirement\_terraform) | >= 1.3 | | [aws](#requirement\_aws) | >= 5.14 | | [datadog](#requirement\_datadog) | >= 3.20.0 | -| [happy](#requirement\_happy) | >= 0.53.5 | +| [happy](#requirement\_happy) | >= 0.108.0 | | [kubernetes](#requirement\_kubernetes) | >= 2.16 | | [random](#requirement\_random) | >= 3.4.3 | | [validation](#requirement\_validation) | 1.0.0 | @@ -16,7 +16,7 @@ | Name | Version | |------|---------| | [datadog](#provider\_datadog) | >= 3.20.0 | -| [happy](#provider\_happy) | >= 0.53.5 | +| [happy](#provider\_happy) | >= 0.108.0 | | [kubernetes](#provider\_kubernetes) | >= 2.16 | | [random](#provider\_random) | >= 3.4.3 | | [validation](#provider\_validation) | 1.0.0 | @@ -59,7 +59,7 @@ | [image\_tags](#input\_image\_tags) | Override image tag for each docker image | `map(string)` | `{}` | no | | [k8s\_namespace](#input\_k8s\_namespace) | K8S namespace for this stack | `string` | n/a | yes | | [routing\_method](#input\_routing\_method) | Traffic routing method for this stack. Valid options are 'DOMAIN', when every service gets a unique domain name, or a 'CONTEXT' when all services share the same domain name, and routing is done by request path. | `string` | `"DOMAIN"` | no | -| [services](#input\_services) | The services you want to deploy as part of this stack. |
map(object({
name : string,
service_type : optional(string, "INTERNAL"),
allow_mesh_services : optional(list(object({
service : optional(string, null),
stack : optional(string, null),
service_account_name : optional(string, null)
})), null),
ingress_security_groups : optional(list(string), []), // Only used for VPC service_type
alb : optional(object({
name : string,
listener_port : number,
}), null), // Only used for TARGET_GROUP_ONLY
desired_count : optional(number, 2),
max_count : optional(number, 2),
scaling_cpu_threshold_percentage : optional(number, 80),
port : optional(number, 80),
scheme : optional(string, "HTTP"),
cmd : optional(list(string), []),
args : optional(list(string), []),
image_pull_policy : optional(string, "IfNotPresent"), // Supported values: IfNotPresent, Always, Never
tag_mutability : optional(bool, true),
scan_on_push : optional(bool, false),
service_port : optional(number, null),
service_scheme : optional(string, "HTTP"),
memory : optional(string, "100Mi"),
memory_requests : optional(string, "100Mi"),
cpu : optional(string, "100m"),
cpu_requests : optional(string, "100m"),
gpu : optional(number, null), // Whole number of GPUs to request, 0 will schedule all available GPUs. Requires GPU-enabled nodes in the cluster, `k8s-device-plugin` installed, platform_architecture = "amd64", and additional_node_selectors = { "nvidia.com/gpu.present" = "true" } present.
health_check_path : optional(string, "/"),
aws_iam : optional(object({
policy_json : optional(string, ""),
service_account_name : optional(string, null),
}), {}),
path : optional(string, "/*"), // Only used for CONTEXT and TARGET_GROUP_ONLY routing
priority : optional(number, 0), // Only used for CONTEXT and TARGET_GROUP_ONLY routing
success_codes : optional(string, "200-499"),
synthetics : optional(bool, false),
initial_delay_seconds : optional(number, 30),
alb_idle_timeout : optional(number, 60) // in seconds
period_seconds : optional(number, 3),
platform_architecture : optional(string, "amd64"), // Supported values: amd64, arm64; GPU nodes are amd64 only.
additional_node_selectors : optional(map(string), {}), // For GPU use: { "nvidia.com/gpu.present" = "true" }
bypasses : optional(map(object({ // Only used for INTERNAL service_type
paths = optional(set(string), [])
methods = optional(set(string), [])
})), {})
sidecars : optional(map(object({
image : string
tag : string
port : optional(number, 80),
scheme : optional(string, "HTTP"),
memory : optional(string, "100Mi")
cpu : optional(string, "100m")
image_pull_policy : optional(string, "IfNotPresent") // Supported values: IfNotPresent, Always, Never
health_check_path : optional(string, "/")
initial_delay_seconds : optional(number, 30),
period_seconds : optional(number, 3),
})), {})
}))
| n/a | yes | +| [services](#input\_services) | The services you want to deploy as part of this stack. |
map(object({
name : string,
service_type : optional(string, "INTERNAL"),
allow_mesh_services : optional(list(object({
service : optional(string, null),
stack : optional(string, null),
service_account_name : optional(string, null)
})), null),
ingress_security_groups : optional(list(string), []), // Only used for VPC service_type
alb : optional(object({
name : string,
listener_port : number,
}), null), // Only used for TARGET_GROUP_ONLY
desired_count : optional(number, 2),
max_count : optional(number, 2),
max_unavailable_count : optional(string, "1"),
scaling_cpu_threshold_percentage : optional(number, 80),
port : optional(number, 80),
scheme : optional(string, "HTTP"),
cmd : optional(list(string), []),
args : optional(list(string), []),
image_pull_policy : optional(string, "IfNotPresent"), // Supported values: IfNotPresent, Always, Never
tag_mutability : optional(bool, true),
scan_on_push : optional(bool, false),
service_port : optional(number, null),
service_scheme : optional(string, "HTTP"),
memory : optional(string, "100Mi"),
memory_requests : optional(string, "100Mi"),
cpu : optional(string, "100m"),
cpu_requests : optional(string, "100m"),
gpu : optional(number, null), // Whole number of GPUs to request, 0 will schedule all available GPUs. Requires GPU-enabled nodes in the cluster, `k8s-device-plugin` installed, platform_architecture = "amd64", and additional_node_selectors = { "nvidia.com/gpu.present" = "true" } present.
health_check_path : optional(string, "/"),
aws_iam : optional(object({
policy_json : optional(string, ""),
service_account_name : optional(string, null),
}), {}),
path : optional(string, "/*"), // Only used for CONTEXT and TARGET_GROUP_ONLY routing
priority : optional(number, 0), // Only used for CONTEXT and TARGET_GROUP_ONLY routing
success_codes : optional(string, "200-499"),
synthetics : optional(bool, false),
initial_delay_seconds : optional(number, 30),
alb_idle_timeout : optional(number, 60) // in seconds
period_seconds : optional(number, 3),
platform_architecture : optional(string, "amd64"), // Supported values: amd64, arm64; GPU nodes are amd64 only.
additional_node_selectors : optional(map(string), {}), // For GPU use: { "nvidia.com/gpu.present" = "true" }
bypasses : optional(map(object({ // Only used for INTERNAL service_type
paths = optional(set(string), [])
methods = optional(set(string), [])
})), {})
sidecars : optional(map(object({
image : string
tag : string
port : optional(number, 80),
scheme : optional(string, "HTTP"),
memory : optional(string, "100Mi")
cpu : optional(string, "100m")
image_pull_policy : optional(string, "IfNotPresent") // Supported values: IfNotPresent, Always, Never
health_check_path : optional(string, "/")
initial_delay_seconds : optional(number, 30),
period_seconds : optional(number, 3),
})), {})
}))
| n/a | yes | | [skip\_config\_injection](#input\_skip\_config\_injection) | Skip injecting app configs into the services / tasks | `bool` | `false` | no | | [stack\_name](#input\_stack\_name) | Happy Path stack name | `string` | n/a | yes | | [stack\_prefix](#input\_stack\_prefix) | Do bucket storage paths and db schemas need to be prefixed with the stack name? (Usually '/{stack\_name}' for dev stacks, and '' for staging/prod stacks) | `string` | `""` | no | diff --git a/terraform/modules/happy-stack-eks/main.tf b/terraform/modules/happy-stack-eks/main.tf index 93fa1e60ee..526a23e5a5 100644 --- a/terraform/modules/happy-stack-eks/main.tf +++ b/terraform/modules/happy-stack-eks/main.tf @@ -167,6 +167,7 @@ module "services" { stack_name = var.stack_name desired_count = each.value.desired_count max_count = try(each.value.max_count, each.value.desired_count) + max_unavailable_count = each.value.max_unavailable_count scaling_cpu_threshold_percentage = each.value.scaling_cpu_threshold_percentage memory = each.value.memory memory_requests = each.value.memory_requests diff --git a/terraform/modules/happy-stack-eks/variables.tf b/terraform/modules/happy-stack-eks/variables.tf index 79f8b64d92..0b21c2faa5 100644 --- a/terraform/modules/happy-stack-eks/variables.tf +++ b/terraform/modules/happy-stack-eks/variables.tf @@ -58,6 +58,7 @@ variable "services" { }), null), // Only used for TARGET_GROUP_ONLY desired_count : optional(number, 2), max_count : optional(number, 2), + max_unavailable_count : optional(string, "1"), scaling_cpu_threshold_percentage : optional(number, 80), port : optional(number, 80), scheme : optional(string, "HTTP"), diff --git a/terraform/modules/happy-stack-eks/versions.tf b/terraform/modules/happy-stack-eks/versions.tf index 5c18439b31..de1cf1ed9a 100644 --- a/terraform/modules/happy-stack-eks/versions.tf +++ b/terraform/modules/happy-stack-eks/versions.tf @@ -18,7 +18,7 @@ terraform { } happy = { source = "chanzuckerberg/happy" - version = ">= 0.53.5" + version = ">= 0.108.0" } random = { source = "hashicorp/random" diff --git a/terraform/provider/CHANGELOG.md b/terraform/provider/CHANGELOG.md index 84275e7d1f..c0bcb387fc 100644 --- a/terraform/provider/CHANGELOG.md +++ b/terraform/provider/CHANGELOG.md @@ -1,5 +1,12 @@ # Changelog +## [0.114.0](https://github.com/chanzuckerberg/happy/compare/terraform-provider-v0.113.1...terraform-provider-v0.114.0) (2023-10-24) + + +### Miscellaneous Chores + +* **terraform-provider:** Synchronize happy platform versions + ## [0.113.1](https://github.com/chanzuckerberg/happy/compare/terraform-provider-v0.113.0...terraform-provider-v0.113.1) (2023-10-18)