Skip to content

Commit

Permalink
feat: promote tpu to ga (#1856)
Browse files Browse the repository at this point in the history
  • Loading branch information
DrFaust92 authored Jan 29, 2024
1 parent 322a5ee commit ba78819
Show file tree
Hide file tree
Showing 41 changed files with 192 additions and 63 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ Then perform the following commands on the root folder:
| enable\_network\_egress\_export | Whether to enable network egress metering for this cluster. If enabled, a daemonset will be created in the cluster to meter network egress traffic. | `bool` | `false` | no |
| enable\_resource\_consumption\_export | Whether to enable resource consumption metering on this cluster. When enabled, a table will be created in the resource export BigQuery dataset to store resource consumption data. The resulting table can be joined with the resource usage table or with BigQuery billing export. | `bool` | `true` | no |
| enable\_shielded\_nodes | Enable Shielded Nodes features on all nodes in this cluster | `bool` | `true` | no |
| enable\_tpu | Enable Cloud TPU resources in the cluster. WARNING: changing this after cluster creation is destructive! | `bool` | `false` | no |
| enable\_vertical\_pod\_autoscaling | Vertical Pod Autoscaling automatically adjusts the resources of pods controlled by it | `bool` | `false` | no |
| filestore\_csi\_driver | The status of the Filestore CSI driver addon, which allows the usage of filestore instance as volumes | `bool` | `false` | no |
| firewall\_inbound\_ports | List of TCP ports for admission/webhook controllers. Either flag `add_master_webhook_firewall_rules` or `add_cluster_firewall_rules` (also adds egress rules) must be set to `true` for inbound-ports firewall rules to be applied. | `list(string)` | <pre>[<br> "8443",<br> "9443",<br> "15017"<br>]</pre> | no |
Expand Down Expand Up @@ -257,6 +258,7 @@ Then perform the following commands on the root folder:
| region | Cluster region |
| release\_channel | The release channel of this cluster |
| service\_account | The service account to default running nodes as if not overridden in `node_pools`. |
| tpu\_ipv4\_cidr\_block | The IP range in CIDR notation used for the TPUs |
| type | Cluster type (regional / zonal) |
| vertical\_pod\_autoscaling\_enabled | Whether vertical pod autoscaling enabled |
| zones | List of zones in which the cluster resides |
Expand Down
3 changes: 1 addition & 2 deletions autogen/main/cluster.tf.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -189,10 +189,9 @@ resource "google_container_cluster" "primary" {
}

enable_kubernetes_alpha = var.enable_kubernetes_alpha

enable_tpu = var.enable_tpu
{% if beta_cluster %}
enable_intranode_visibility = var.enable_intranode_visibility
enable_tpu = var.enable_tpu

dynamic "pod_security_policy_config" {
for_each = var.enable_pod_security_policy ? [var.enable_pod_security_policy] : []
Expand Down
3 changes: 0 additions & 3 deletions autogen/main/firewall.tf.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,6 @@ resource "google_compute_firewall" "intra_egress" {
}


{% if beta_cluster %}
/******************************************
Allow egress to the TPU IPv4 CIDR block

Expand Down Expand Up @@ -95,8 +94,6 @@ resource "google_compute_firewall" "tpu_egress" {
{% endif %}
}


{% endif %}
/******************************************
Allow GKE master to hit non 443 ports for
Webhooks/Admission Controllers
Expand Down
10 changes: 5 additions & 5 deletions autogen/main/outputs.tf.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,11 @@ output "identity_namespace" {
]
}

output "tpu_ipv4_cidr_block" {
description = "The IP range in CIDR notation used for the TPUs"
value = var.enable_tpu ? google_container_cluster.primary.tpu_ipv4_cidr_block : null
}

{% if autopilot_cluster != true %}
output "mesh_certificates_config" {
description = "Mesh certificates configuration"
Expand Down Expand Up @@ -228,9 +233,4 @@ output "identity_service_enabled" {
description = "Whether Identity Service is enabled"
value = local.cluster_pod_security_policy_enabled
}

output "tpu_ipv4_cidr_block" {
description = "The IP range in CIDR notation used for the TPUs"
value = var.enable_tpu ? google_container_cluster.primary.tpu_ipv4_cidr_block : null
}
{% endif %}
3 changes: 1 addition & 2 deletions autogen/main/variables.tf.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -600,13 +600,12 @@ variable "deletion_protection" {
default = true
}

{% if beta_cluster %}
variable "enable_tpu" {
type = bool
description = "Enable Cloud TPU resources in the cluster. WARNING: changing this after cluster creation is destructive!"
default = false
}
{% endif %}

{% if autopilot_cluster != true %}
variable "network_policy" {
type = bool
Expand Down
2 changes: 1 addition & 1 deletion cluster.tf
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ resource "google_container_cluster" "primary" {
}

enable_kubernetes_alpha = var.enable_kubernetes_alpha

enable_tpu = var.enable_tpu
dynamic "master_authorized_networks_config" {
for_each = local.master_authorized_networks_config
content {
Expand Down
35 changes: 35 additions & 0 deletions firewall.tf
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,41 @@ resource "google_compute_firewall" "intra_egress" {
}


/******************************************
Allow egress to the TPU IPv4 CIDR block
This rule is defined separately from the
intra_egress rule above since it requires
an output from the google_container_cluster
resource.
https://github.com/terraform-google-modules/terraform-google-kubernetes-engine/issues/1124
*****************************************/
resource "google_compute_firewall" "tpu_egress" {
count = var.add_cluster_firewall_rules && var.enable_tpu ? 1 : 0
name = "gke-${substr(var.name, 0, min(36, length(var.name)))}-tpu-egress"
description = "Managed by terraform gke module: Allow pods to communicate with TPUs"
project = local.network_project_id
network = var.network
priority = var.firewall_priority
direction = "EGRESS"

target_tags = [local.cluster_network_tag]
destination_ranges = [google_container_cluster.primary.tpu_ipv4_cidr_block]

# Allow all possible protocols
allow { protocol = "tcp" }
allow { protocol = "udp" }
allow { protocol = "icmp" }
allow { protocol = "sctp" }
allow { protocol = "esp" }
allow { protocol = "ah" }

depends_on = [
google_container_cluster.primary,
]
}

/******************************************
Allow GKE master to hit non 443 ports for
Webhooks/Admission Controllers
Expand Down
1 change: 0 additions & 1 deletion modules/beta-autopilot-private-cluster/firewall.tf
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,6 @@ resource "google_compute_firewall" "tpu_egress" {

}


/******************************************
Allow GKE master to hit non 443 ports for
Webhooks/Admission Controllers
Expand Down
10 changes: 5 additions & 5 deletions modules/beta-autopilot-private-cluster/outputs.tf
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,11 @@ output "identity_namespace" {
]
}

output "tpu_ipv4_cidr_block" {
description = "The IP range in CIDR notation used for the TPUs"
value = var.enable_tpu ? google_container_cluster.primary.tpu_ipv4_cidr_block : null
}



output "master_ipv4_cidr_block" {
Expand Down Expand Up @@ -183,8 +188,3 @@ output "identity_service_enabled" {
description = "Whether Identity Service is enabled"
value = local.cluster_pod_security_policy_enabled
}

output "tpu_ipv4_cidr_block" {
description = "The IP range in CIDR notation used for the TPUs"
value = var.enable_tpu ? google_container_cluster.primary.tpu_ipv4_cidr_block : null
}
1 change: 1 addition & 0 deletions modules/beta-autopilot-private-cluster/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -433,6 +433,7 @@ variable "enable_tpu" {
description = "Enable Cloud TPU resources in the cluster. WARNING: changing this after cluster creation is destructive!"
default = false
}

variable "database_encryption" {
description = "Application-layer Secrets Encryption settings. The object format is {state = string, key_name = string}. Valid values of state are: \"ENCRYPTED\"; \"DECRYPTED\". key_name is the name of a CloudKMS key."
type = list(object({ state = string, key_name = string }))
Expand Down
1 change: 0 additions & 1 deletion modules/beta-autopilot-public-cluster/firewall.tf
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,6 @@ resource "google_compute_firewall" "tpu_egress" {
]
}


/******************************************
Allow GKE master to hit non 443 ports for
Webhooks/Admission Controllers
Expand Down
10 changes: 5 additions & 5 deletions modules/beta-autopilot-public-cluster/outputs.tf
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,11 @@ output "identity_namespace" {
]
}

output "tpu_ipv4_cidr_block" {
description = "The IP range in CIDR notation used for the TPUs"
value = var.enable_tpu ? google_container_cluster.primary.tpu_ipv4_cidr_block : null
}



output "cloudrun_enabled" {
Expand Down Expand Up @@ -173,8 +178,3 @@ output "identity_service_enabled" {
description = "Whether Identity Service is enabled"
value = local.cluster_pod_security_policy_enabled
}

output "tpu_ipv4_cidr_block" {
description = "The IP range in CIDR notation used for the TPUs"
value = var.enable_tpu ? google_container_cluster.primary.tpu_ipv4_cidr_block : null
}
1 change: 1 addition & 0 deletions modules/beta-autopilot-public-cluster/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,7 @@ variable "enable_tpu" {
description = "Enable Cloud TPU resources in the cluster. WARNING: changing this after cluster creation is destructive!"
default = false
}

variable "database_encryption" {
description = "Application-layer Secrets Encryption settings. The object format is {state = string, key_name = string}. Valid values of state are: \"ENCRYPTED\"; \"DECRYPTED\". key_name is the name of a CloudKMS key."
type = list(object({ state = string, key_name = string }))
Expand Down
5 changes: 2 additions & 3 deletions modules/beta-private-cluster-update-variant/cluster.tf
Original file line number Diff line number Diff line change
Expand Up @@ -152,10 +152,9 @@ resource "google_container_cluster" "primary" {
}
}

enable_kubernetes_alpha = var.enable_kubernetes_alpha

enable_intranode_visibility = var.enable_intranode_visibility
enable_kubernetes_alpha = var.enable_kubernetes_alpha
enable_tpu = var.enable_tpu
enable_intranode_visibility = var.enable_intranode_visibility

dynamic "pod_security_policy_config" {
for_each = var.enable_pod_security_policy ? [var.enable_pod_security_policy] : []
Expand Down
1 change: 0 additions & 1 deletion modules/beta-private-cluster-update-variant/firewall.tf
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,6 @@ resource "google_compute_firewall" "tpu_egress" {

}


/******************************************
Allow GKE master to hit non 443 ports for
Webhooks/Admission Controllers
Expand Down
10 changes: 5 additions & 5 deletions modules/beta-private-cluster-update-variant/outputs.tf
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,11 @@ output "identity_namespace" {
]
}

output "tpu_ipv4_cidr_block" {
description = "The IP range in CIDR notation used for the TPUs"
value = var.enable_tpu ? google_container_cluster.primary.tpu_ipv4_cidr_block : null
}

output "mesh_certificates_config" {
description = "Mesh certificates configuration"
value = local.cluster_mesh_certificates_config
Expand Down Expand Up @@ -209,8 +214,3 @@ output "identity_service_enabled" {
description = "Whether Identity Service is enabled"
value = local.cluster_pod_security_policy_enabled
}

output "tpu_ipv4_cidr_block" {
description = "The IP range in CIDR notation used for the TPUs"
value = var.enable_tpu ? google_container_cluster.primary.tpu_ipv4_cidr_block : null
}
1 change: 1 addition & 0 deletions modules/beta-private-cluster-update-variant/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -578,6 +578,7 @@ variable "enable_tpu" {
description = "Enable Cloud TPU resources in the cluster. WARNING: changing this after cluster creation is destructive!"
default = false
}

variable "network_policy" {
type = bool
description = "Enable network policy addon"
Expand Down
5 changes: 2 additions & 3 deletions modules/beta-private-cluster/cluster.tf
Original file line number Diff line number Diff line change
Expand Up @@ -152,10 +152,9 @@ resource "google_container_cluster" "primary" {
}
}

enable_kubernetes_alpha = var.enable_kubernetes_alpha

enable_intranode_visibility = var.enable_intranode_visibility
enable_kubernetes_alpha = var.enable_kubernetes_alpha
enable_tpu = var.enable_tpu
enable_intranode_visibility = var.enable_intranode_visibility

dynamic "pod_security_policy_config" {
for_each = var.enable_pod_security_policy ? [var.enable_pod_security_policy] : []
Expand Down
1 change: 0 additions & 1 deletion modules/beta-private-cluster/firewall.tf
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,6 @@ resource "google_compute_firewall" "tpu_egress" {

}


/******************************************
Allow GKE master to hit non 443 ports for
Webhooks/Admission Controllers
Expand Down
10 changes: 5 additions & 5 deletions modules/beta-private-cluster/outputs.tf
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,11 @@ output "identity_namespace" {
]
}

output "tpu_ipv4_cidr_block" {
description = "The IP range in CIDR notation used for the TPUs"
value = var.enable_tpu ? google_container_cluster.primary.tpu_ipv4_cidr_block : null
}

output "mesh_certificates_config" {
description = "Mesh certificates configuration"
value = local.cluster_mesh_certificates_config
Expand Down Expand Up @@ -209,8 +214,3 @@ output "identity_service_enabled" {
description = "Whether Identity Service is enabled"
value = local.cluster_pod_security_policy_enabled
}

output "tpu_ipv4_cidr_block" {
description = "The IP range in CIDR notation used for the TPUs"
value = var.enable_tpu ? google_container_cluster.primary.tpu_ipv4_cidr_block : null
}
1 change: 1 addition & 0 deletions modules/beta-private-cluster/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -578,6 +578,7 @@ variable "enable_tpu" {
description = "Enable Cloud TPU resources in the cluster. WARNING: changing this after cluster creation is destructive!"
default = false
}

variable "network_policy" {
type = bool
description = "Enable network policy addon"
Expand Down
5 changes: 2 additions & 3 deletions modules/beta-public-cluster-update-variant/cluster.tf
Original file line number Diff line number Diff line change
Expand Up @@ -152,10 +152,9 @@ resource "google_container_cluster" "primary" {
}
}

enable_kubernetes_alpha = var.enable_kubernetes_alpha

enable_intranode_visibility = var.enable_intranode_visibility
enable_kubernetes_alpha = var.enable_kubernetes_alpha
enable_tpu = var.enable_tpu
enable_intranode_visibility = var.enable_intranode_visibility

dynamic "pod_security_policy_config" {
for_each = var.enable_pod_security_policy ? [var.enable_pod_security_policy] : []
Expand Down
1 change: 0 additions & 1 deletion modules/beta-public-cluster-update-variant/firewall.tf
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,6 @@ resource "google_compute_firewall" "tpu_egress" {
]
}


/******************************************
Allow GKE master to hit non 443 ports for
Webhooks/Admission Controllers
Expand Down
10 changes: 5 additions & 5 deletions modules/beta-public-cluster-update-variant/outputs.tf
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,11 @@ output "identity_namespace" {
]
}

output "tpu_ipv4_cidr_block" {
description = "The IP range in CIDR notation used for the TPUs"
value = var.enable_tpu ? google_container_cluster.primary.tpu_ipv4_cidr_block : null
}

output "mesh_certificates_config" {
description = "Mesh certificates configuration"
value = local.cluster_mesh_certificates_config
Expand Down Expand Up @@ -199,8 +204,3 @@ output "identity_service_enabled" {
description = "Whether Identity Service is enabled"
value = local.cluster_pod_security_policy_enabled
}

output "tpu_ipv4_cidr_block" {
description = "The IP range in CIDR notation used for the TPUs"
value = var.enable_tpu ? google_container_cluster.primary.tpu_ipv4_cidr_block : null
}
1 change: 1 addition & 0 deletions modules/beta-public-cluster-update-variant/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -548,6 +548,7 @@ variable "enable_tpu" {
description = "Enable Cloud TPU resources in the cluster. WARNING: changing this after cluster creation is destructive!"
default = false
}

variable "network_policy" {
type = bool
description = "Enable network policy addon"
Expand Down
5 changes: 2 additions & 3 deletions modules/beta-public-cluster/cluster.tf
Original file line number Diff line number Diff line change
Expand Up @@ -152,10 +152,9 @@ resource "google_container_cluster" "primary" {
}
}

enable_kubernetes_alpha = var.enable_kubernetes_alpha

enable_intranode_visibility = var.enable_intranode_visibility
enable_kubernetes_alpha = var.enable_kubernetes_alpha
enable_tpu = var.enable_tpu
enable_intranode_visibility = var.enable_intranode_visibility

dynamic "pod_security_policy_config" {
for_each = var.enable_pod_security_policy ? [var.enable_pod_security_policy] : []
Expand Down
1 change: 0 additions & 1 deletion modules/beta-public-cluster/firewall.tf
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,6 @@ resource "google_compute_firewall" "tpu_egress" {
]
}


/******************************************
Allow GKE master to hit non 443 ports for
Webhooks/Admission Controllers
Expand Down
10 changes: 5 additions & 5 deletions modules/beta-public-cluster/outputs.tf
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,11 @@ output "identity_namespace" {
]
}

output "tpu_ipv4_cidr_block" {
description = "The IP range in CIDR notation used for the TPUs"
value = var.enable_tpu ? google_container_cluster.primary.tpu_ipv4_cidr_block : null
}

output "mesh_certificates_config" {
description = "Mesh certificates configuration"
value = local.cluster_mesh_certificates_config
Expand Down Expand Up @@ -199,8 +204,3 @@ output "identity_service_enabled" {
description = "Whether Identity Service is enabled"
value = local.cluster_pod_security_policy_enabled
}

output "tpu_ipv4_cidr_block" {
description = "The IP range in CIDR notation used for the TPUs"
value = var.enable_tpu ? google_container_cluster.primary.tpu_ipv4_cidr_block : null
}
Loading

0 comments on commit ba78819

Please sign in to comment.