Skip to content

Commit

Permalink
feat(TPG>=5.40.0)!: Add support for RayOperator Addon (#2032)
Browse files Browse the repository at this point in the history
Co-authored-by: Gen Lu <[email protected]>
Co-authored-by: Andrew Peabody <[email protected]>
  • Loading branch information
3 people authored Aug 10, 2024
1 parent f3dc2aa commit c046af1
Show file tree
Hide file tree
Showing 52 changed files with 365 additions and 19 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,7 @@ Then perform the following commands on the root folder:
| notification\_config\_topic | The desired Pub/Sub topic to which notifications will be sent by GKE. Format is projects/{project}/topics/{topic}. | `string` | `""` | no |
| notification\_filter\_event\_type | Choose what type of notifications you want to receive. If no filters are applied, you'll receive all notification types. Can be used to filter what notifications are sent. Accepted values are UPGRADE\_AVAILABLE\_EVENT, UPGRADE\_EVENT, and SECURITY\_BULLETIN\_EVENT. | `list(string)` | `[]` | no |
| project\_id | The project ID to host the cluster in (required) | `string` | n/a | yes |
| ray\_operator\_config | The Ray Operator Addon configuration for this cluster. | <pre>object({<br> enabled = bool<br> logging_enabled = optional(bool, false)<br> monitoring_enabled = optional(bool, false)<br> })</pre> | <pre>{<br> "enabled": false,<br> "logging_enabled": false,<br> "monitoring_enabled": false<br>}</pre> | no |
| region | The region to host the cluster in (optional if zonal cluster / required if regional) | `string` | `null` | no |
| regional | Whether is a regional cluster (zonal cluster if set false. WARNING: changing this after cluster creation is destructive!) | `bool` | `true` | no |
| registry\_project\_ids | Projects holding Google Container Registries. If empty, we use the cluster project. If a service account is created and the `grant_registry_access` variable is set to `true`, the `storage.objectViewer` and `artifactregsitry.reader` roles are assigned on these projects. | `list(string)` | `[]` | no |
Expand Down
17 changes: 17 additions & 0 deletions autogen/main/cluster.tf.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,23 @@ resource "google_container_cluster" "primary" {
enabled = stateful_ha_config.value.enabled
}
}

dynamic "ray_operator_config" {
for_each = local.ray_operator_config

content {

enabled = ray_operator_config.value.enabled

ray_cluster_logging_config {
enabled = ray_operator_config.value.logging_enabled
}
ray_cluster_monitoring_config {
enabled = ray_operator_config.value.monitoring_enabled
}
}
}

{% if beta_cluster and autopilot_cluster != true %}
istio_config {
disabled = !var.istio
Expand Down
1 change: 1 addition & 0 deletions autogen/main/main.tf.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ locals {
gke_backup_agent_config = var.gke_backup_agent_config ? [{ enabled = true }] : [{ enabled = false }]
gcs_fuse_csi_driver_config = var.gcs_fuse_csi_driver ? [{ enabled = true }] : []
stateful_ha_config = var.stateful_ha ? [{ enabled = true }] : []
ray_operator_config = length(var.ray_operator_config) > 0 && lookup(var.ray_operator_config, "enabled", false)? [var.ray_operator_config]: []
{% if beta_cluster and autopilot_cluster != true %}
cluster_cloudrun_config_load_balancer_config = (var.cloudrun && var.cloudrun_load_balancer_type != "") ? {
load_balancer_type = var.cloudrun_load_balancer_type
Expand Down
14 changes: 14 additions & 0 deletions autogen/main/variables.tf.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -814,6 +814,20 @@ variable "stateful_ha" {
default = false
}

variable "ray_operator_config" {
type = object({
enabled = bool
logging_enabled = optional(bool, false)
monitoring_enabled = optional(bool, false)
})
description = "The Ray Operator Addon configuration for this cluster."
default = {
enabled = false
logging_enabled = false
monitoring_enabled = false
}
}

variable "timeouts" {
type = map(string)
description = "Timeout for cluster operations."
Expand Down
6 changes: 3 additions & 3 deletions autogen/main/versions.tf.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,11 @@ terraform {
required_providers {
google = {
source = "hashicorp/google"
version = ">= 5.33.0, < 6"
version = ">= 5.40.0, < 6"
}
google-beta = {
source = "hashicorp/google-beta"
version = ">= 5.33.0, < 6"
version = ">= 5.40.0, < 6"
}
kubernetes = {
source = "hashicorp/kubernetes"
Expand All @@ -46,7 +46,7 @@ terraform {
required_providers {
google = {
source = "hashicorp/google"
version = ">= 5.25.0, < 6"
version = ">= 5.40.0, < 6"
}
kubernetes = {
source = "hashicorp/kubernetes"
Expand Down
17 changes: 17 additions & 0 deletions cluster.tf
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,23 @@ resource "google_container_cluster" "primary" {
enabled = stateful_ha_config.value.enabled
}
}

dynamic "ray_operator_config" {
for_each = local.ray_operator_config

content {

enabled = ray_operator_config.value.enabled

ray_cluster_logging_config {
enabled = ray_operator_config.value.logging_enabled
}
ray_cluster_monitoring_config {
enabled = ray_operator_config.value.monitoring_enabled
}
}
}

}

datapath_provider = var.datapath_provider
Expand Down
7 changes: 6 additions & 1 deletion examples/simple_autopilot_public/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,17 @@ module "gke" {
subnetwork = local.subnet_names[index(module.gcp-network.subnets_names, local.subnet_name)]
ip_range_pods = local.pods_range_name
ip_range_services = local.svc_range_name
release_channel = "REGULAR"
release_channel = "RAPID"
enable_vertical_pod_autoscaling = true
network_tags = [local.cluster_type]
deletion_protection = false
enable_l4_ilb_subsetting = true
gcs_fuse_csi_driver = true
stateful_ha = false
gke_backup_agent_config = false
ray_operator_config = {
enabled = true
logging_enabled = true
monitoring_enabled = true
}
}
1 change: 1 addition & 0 deletions main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ locals {
gke_backup_agent_config = var.gke_backup_agent_config ? [{ enabled = true }] : [{ enabled = false }]
gcs_fuse_csi_driver_config = var.gcs_fuse_csi_driver ? [{ enabled = true }] : []
stateful_ha_config = var.stateful_ha ? [{ enabled = true }] : []
ray_operator_config = length(var.ray_operator_config) > 0 && lookup(var.ray_operator_config, "enabled", false) ? [var.ray_operator_config] : []

cluster_authenticator_security_group = var.authenticator_security_group == null ? [] : [{
security_group = var.authenticator_security_group
Expand Down
1 change: 1 addition & 0 deletions modules/beta-autopilot-private-cluster/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ Then perform the following commands on the root folder:
| notification\_filter\_event\_type | Choose what type of notifications you want to receive. If no filters are applied, you'll receive all notification types. Can be used to filter what notifications are sent. Accepted values are UPGRADE\_AVAILABLE\_EVENT, UPGRADE\_EVENT, and SECURITY\_BULLETIN\_EVENT. | `list(string)` | `[]` | no |
| private\_endpoint\_subnetwork | The subnetwork to use for the hosted master network. | `string` | `null` | no |
| project\_id | The project ID to host the cluster in (required) | `string` | n/a | yes |
| ray\_operator\_config | The Ray Operator Addon configuration for this cluster. | <pre>object({<br> enabled = bool<br> logging_enabled = optional(bool, false)<br> monitoring_enabled = optional(bool, false)<br> })</pre> | <pre>{<br> "enabled": false,<br> "logging_enabled": false,<br> "monitoring_enabled": false<br>}</pre> | no |
| region | The region to host the cluster in (optional if zonal cluster / required if regional) | `string` | `null` | no |
| regional | Whether is a regional cluster (zonal cluster if set false. WARNING: changing this after cluster creation is destructive!) | `bool` | `true` | no |
| registry\_project\_ids | Projects holding Google Container Registries. If empty, we use the cluster project. If a service account is created and the `grant_registry_access` variable is set to `true`, the `storage.objectViewer` and `artifactregsitry.reader` roles are assigned on these projects. | `list(string)` | `[]` | no |
Expand Down
17 changes: 17 additions & 0 deletions modules/beta-autopilot-private-cluster/cluster.tf
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,23 @@ resource "google_container_cluster" "primary" {
enabled = stateful_ha_config.value.enabled
}
}

dynamic "ray_operator_config" {
for_each = local.ray_operator_config

content {

enabled = ray_operator_config.value.enabled

ray_cluster_logging_config {
enabled = ray_operator_config.value.logging_enabled
}
ray_cluster_monitoring_config {
enabled = ray_operator_config.value.monitoring_enabled
}
}
}

}

allow_net_admin = var.allow_net_admin
Expand Down
1 change: 1 addition & 0 deletions modules/beta-autopilot-private-cluster/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ locals {
gke_backup_agent_config = var.gke_backup_agent_config ? [{ enabled = true }] : [{ enabled = false }]
gcs_fuse_csi_driver_config = var.gcs_fuse_csi_driver ? [{ enabled = true }] : []
stateful_ha_config = var.stateful_ha ? [{ enabled = true }] : []
ray_operator_config = length(var.ray_operator_config) > 0 && lookup(var.ray_operator_config, "enabled", false) ? [var.ray_operator_config] : []

cluster_authenticator_security_group = var.authenticator_security_group == null ? [] : [{
security_group = var.authenticator_security_group
Expand Down
14 changes: 14 additions & 0 deletions modules/beta-autopilot-private-cluster/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -494,6 +494,20 @@ variable "stateful_ha" {
default = false
}

variable "ray_operator_config" {
type = object({
enabled = bool
logging_enabled = optional(bool, false)
monitoring_enabled = optional(bool, false)
})
description = "The Ray Operator Addon configuration for this cluster."
default = {
enabled = false
logging_enabled = false
monitoring_enabled = false
}
}

variable "timeouts" {
type = map(string)
description = "Timeout for cluster operations."
Expand Down
4 changes: 2 additions & 2 deletions modules/beta-autopilot-private-cluster/versions.tf
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,11 @@ terraform {
required_providers {
google = {
source = "hashicorp/google"
version = ">= 5.33.0, < 6"
version = ">= 5.40.0, < 6"
}
google-beta = {
source = "hashicorp/google-beta"
version = ">= 5.33.0, < 6"
version = ">= 5.40.0, < 6"
}
kubernetes = {
source = "hashicorp/kubernetes"
Expand Down
1 change: 1 addition & 0 deletions modules/beta-autopilot-public-cluster/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ Then perform the following commands on the root folder:
| notification\_config\_topic | The desired Pub/Sub topic to which notifications will be sent by GKE. Format is projects/{project}/topics/{topic}. | `string` | `""` | no |
| notification\_filter\_event\_type | Choose what type of notifications you want to receive. If no filters are applied, you'll receive all notification types. Can be used to filter what notifications are sent. Accepted values are UPGRADE\_AVAILABLE\_EVENT, UPGRADE\_EVENT, and SECURITY\_BULLETIN\_EVENT. | `list(string)` | `[]` | no |
| project\_id | The project ID to host the cluster in (required) | `string` | n/a | yes |
| ray\_operator\_config | The Ray Operator Addon configuration for this cluster. | <pre>object({<br> enabled = bool<br> logging_enabled = optional(bool, false)<br> monitoring_enabled = optional(bool, false)<br> })</pre> | <pre>{<br> "enabled": false,<br> "logging_enabled": false,<br> "monitoring_enabled": false<br>}</pre> | no |
| region | The region to host the cluster in (optional if zonal cluster / required if regional) | `string` | `null` | no |
| regional | Whether is a regional cluster (zonal cluster if set false. WARNING: changing this after cluster creation is destructive!) | `bool` | `true` | no |
| registry\_project\_ids | Projects holding Google Container Registries. If empty, we use the cluster project. If a service account is created and the `grant_registry_access` variable is set to `true`, the `storage.objectViewer` and `artifactregsitry.reader` roles are assigned on these projects. | `list(string)` | `[]` | no |
Expand Down
17 changes: 17 additions & 0 deletions modules/beta-autopilot-public-cluster/cluster.tf
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,23 @@ resource "google_container_cluster" "primary" {
enabled = stateful_ha_config.value.enabled
}
}

dynamic "ray_operator_config" {
for_each = local.ray_operator_config

content {

enabled = ray_operator_config.value.enabled

ray_cluster_logging_config {
enabled = ray_operator_config.value.logging_enabled
}
ray_cluster_monitoring_config {
enabled = ray_operator_config.value.monitoring_enabled
}
}
}

}

allow_net_admin = var.allow_net_admin
Expand Down
1 change: 1 addition & 0 deletions modules/beta-autopilot-public-cluster/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ locals {
gke_backup_agent_config = var.gke_backup_agent_config ? [{ enabled = true }] : [{ enabled = false }]
gcs_fuse_csi_driver_config = var.gcs_fuse_csi_driver ? [{ enabled = true }] : []
stateful_ha_config = var.stateful_ha ? [{ enabled = true }] : []
ray_operator_config = length(var.ray_operator_config) > 0 && lookup(var.ray_operator_config, "enabled", false) ? [var.ray_operator_config] : []

cluster_authenticator_security_group = var.authenticator_security_group == null ? [] : [{
security_group = var.authenticator_security_group
Expand Down
14 changes: 14 additions & 0 deletions modules/beta-autopilot-public-cluster/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -458,6 +458,20 @@ variable "stateful_ha" {
default = false
}

variable "ray_operator_config" {
type = object({
enabled = bool
logging_enabled = optional(bool, false)
monitoring_enabled = optional(bool, false)
})
description = "The Ray Operator Addon configuration for this cluster."
default = {
enabled = false
logging_enabled = false
monitoring_enabled = false
}
}

variable "timeouts" {
type = map(string)
description = "Timeout for cluster operations."
Expand Down
4 changes: 2 additions & 2 deletions modules/beta-autopilot-public-cluster/versions.tf
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,11 @@ terraform {
required_providers {
google = {
source = "hashicorp/google"
version = ">= 5.33.0, < 6"
version = ">= 5.40.0, < 6"
}
google-beta = {
source = "hashicorp/google-beta"
version = ">= 5.33.0, < 6"
version = ">= 5.40.0, < 6"
}
kubernetes = {
source = "hashicorp/kubernetes"
Expand Down
1 change: 1 addition & 0 deletions modules/beta-private-cluster-update-variant/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,7 @@ Then perform the following commands on the root folder:
| notification\_filter\_event\_type | Choose what type of notifications you want to receive. If no filters are applied, you'll receive all notification types. Can be used to filter what notifications are sent. Accepted values are UPGRADE\_AVAILABLE\_EVENT, UPGRADE\_EVENT, and SECURITY\_BULLETIN\_EVENT. | `list(string)` | `[]` | no |
| private\_endpoint\_subnetwork | The subnetwork to use for the hosted master network. | `string` | `null` | no |
| project\_id | The project ID to host the cluster in (required) | `string` | n/a | yes |
| ray\_operator\_config | The Ray Operator Addon configuration for this cluster. | <pre>object({<br> enabled = bool<br> logging_enabled = optional(bool, false)<br> monitoring_enabled = optional(bool, false)<br> })</pre> | <pre>{<br> "enabled": false,<br> "logging_enabled": false,<br> "monitoring_enabled": false<br>}</pre> | no |
| region | The region to host the cluster in (optional if zonal cluster / required if regional) | `string` | `null` | no |
| regional | Whether is a regional cluster (zonal cluster if set false. WARNING: changing this after cluster creation is destructive!) | `bool` | `true` | no |
| registry\_project\_ids | Projects holding Google Container Registries. If empty, we use the cluster project. If a service account is created and the `grant_registry_access` variable is set to `true`, the `storage.objectViewer` and `artifactregsitry.reader` roles are assigned on these projects. | `list(string)` | `[]` | no |
Expand Down
17 changes: 17 additions & 0 deletions modules/beta-private-cluster-update-variant/cluster.tf
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,23 @@ resource "google_container_cluster" "primary" {
enabled = stateful_ha_config.value.enabled
}
}

dynamic "ray_operator_config" {
for_each = local.ray_operator_config

content {

enabled = ray_operator_config.value.enabled

ray_cluster_logging_config {
enabled = ray_operator_config.value.logging_enabled
}
ray_cluster_monitoring_config {
enabled = ray_operator_config.value.monitoring_enabled
}
}
}

istio_config {
disabled = !var.istio
auth = var.istio_auth
Expand Down
1 change: 1 addition & 0 deletions modules/beta-private-cluster-update-variant/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ locals {
gke_backup_agent_config = var.gke_backup_agent_config ? [{ enabled = true }] : [{ enabled = false }]
gcs_fuse_csi_driver_config = var.gcs_fuse_csi_driver ? [{ enabled = true }] : []
stateful_ha_config = var.stateful_ha ? [{ enabled = true }] : []
ray_operator_config = length(var.ray_operator_config) > 0 && lookup(var.ray_operator_config, "enabled", false) ? [var.ray_operator_config] : []
cluster_cloudrun_config_load_balancer_config = (var.cloudrun && var.cloudrun_load_balancer_type != "") ? {
load_balancer_type = var.cloudrun_load_balancer_type
} : {}
Expand Down
14 changes: 14 additions & 0 deletions modules/beta-private-cluster-update-variant/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -776,6 +776,20 @@ variable "stateful_ha" {
default = false
}

variable "ray_operator_config" {
type = object({
enabled = bool
logging_enabled = optional(bool, false)
monitoring_enabled = optional(bool, false)
})
description = "The Ray Operator Addon configuration for this cluster."
default = {
enabled = false
logging_enabled = false
monitoring_enabled = false
}
}

variable "timeouts" {
type = map(string)
description = "Timeout for cluster operations."
Expand Down
4 changes: 2 additions & 2 deletions modules/beta-private-cluster-update-variant/versions.tf
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,11 @@ terraform {
required_providers {
google = {
source = "hashicorp/google"
version = ">= 5.33.0, < 6"
version = ">= 5.40.0, < 6"
}
google-beta = {
source = "hashicorp/google-beta"
version = ">= 5.33.0, < 6"
version = ">= 5.40.0, < 6"
}
kubernetes = {
source = "hashicorp/kubernetes"
Expand Down
1 change: 1 addition & 0 deletions modules/beta-private-cluster/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,7 @@ Then perform the following commands on the root folder:
| notification\_filter\_event\_type | Choose what type of notifications you want to receive. If no filters are applied, you'll receive all notification types. Can be used to filter what notifications are sent. Accepted values are UPGRADE\_AVAILABLE\_EVENT, UPGRADE\_EVENT, and SECURITY\_BULLETIN\_EVENT. | `list(string)` | `[]` | no |
| private\_endpoint\_subnetwork | The subnetwork to use for the hosted master network. | `string` | `null` | no |
| project\_id | The project ID to host the cluster in (required) | `string` | n/a | yes |
| ray\_operator\_config | The Ray Operator Addon configuration for this cluster. | <pre>object({<br> enabled = bool<br> logging_enabled = optional(bool, false)<br> monitoring_enabled = optional(bool, false)<br> })</pre> | <pre>{<br> "enabled": false,<br> "logging_enabled": false,<br> "monitoring_enabled": false<br>}</pre> | no |
| region | The region to host the cluster in (optional if zonal cluster / required if regional) | `string` | `null` | no |
| regional | Whether is a regional cluster (zonal cluster if set false. WARNING: changing this after cluster creation is destructive!) | `bool` | `true` | no |
| registry\_project\_ids | Projects holding Google Container Registries. If empty, we use the cluster project. If a service account is created and the `grant_registry_access` variable is set to `true`, the `storage.objectViewer` and `artifactregsitry.reader` roles are assigned on these projects. | `list(string)` | `[]` | no |
Expand Down
17 changes: 17 additions & 0 deletions modules/beta-private-cluster/cluster.tf
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,23 @@ resource "google_container_cluster" "primary" {
enabled = stateful_ha_config.value.enabled
}
}

dynamic "ray_operator_config" {
for_each = local.ray_operator_config

content {

enabled = ray_operator_config.value.enabled

ray_cluster_logging_config {
enabled = ray_operator_config.value.logging_enabled
}
ray_cluster_monitoring_config {
enabled = ray_operator_config.value.monitoring_enabled
}
}
}

istio_config {
disabled = !var.istio
auth = var.istio_auth
Expand Down
1 change: 1 addition & 0 deletions modules/beta-private-cluster/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ locals {
gke_backup_agent_config = var.gke_backup_agent_config ? [{ enabled = true }] : [{ enabled = false }]
gcs_fuse_csi_driver_config = var.gcs_fuse_csi_driver ? [{ enabled = true }] : []
stateful_ha_config = var.stateful_ha ? [{ enabled = true }] : []
ray_operator_config = length(var.ray_operator_config) > 0 && lookup(var.ray_operator_config, "enabled", false) ? [var.ray_operator_config] : []
cluster_cloudrun_config_load_balancer_config = (var.cloudrun && var.cloudrun_load_balancer_type != "") ? {
load_balancer_type = var.cloudrun_load_balancer_type
} : {}
Expand Down
Loading

0 comments on commit c046af1

Please sign in to comment.