Skip to content

Commit ba78819

Browse files
authored
feat: promote tpu to ga (#1856)
1 parent 322a5ee commit ba78819

File tree

41 files changed

+192
-63
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+192
-63
lines changed

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,7 @@ Then perform the following commands on the root folder:
162162
| enable\_network\_egress\_export | Whether to enable network egress metering for this cluster. If enabled, a daemonset will be created in the cluster to meter network egress traffic. | `bool` | `false` | no |
163163
| enable\_resource\_consumption\_export | Whether to enable resource consumption metering on this cluster. When enabled, a table will be created in the resource export BigQuery dataset to store resource consumption data. The resulting table can be joined with the resource usage table or with BigQuery billing export. | `bool` | `true` | no |
164164
| enable\_shielded\_nodes | Enable Shielded Nodes features on all nodes in this cluster | `bool` | `true` | no |
165+
| enable\_tpu | Enable Cloud TPU resources in the cluster. WARNING: changing this after cluster creation is destructive! | `bool` | `false` | no |
165166
| enable\_vertical\_pod\_autoscaling | Vertical Pod Autoscaling automatically adjusts the resources of pods controlled by it | `bool` | `false` | no |
166167
| filestore\_csi\_driver | The status of the Filestore CSI driver addon, which allows the usage of filestore instance as volumes | `bool` | `false` | no |
167168
| firewall\_inbound\_ports | List of TCP ports for admission/webhook controllers. Either flag `add_master_webhook_firewall_rules` or `add_cluster_firewall_rules` (also adds egress rules) must be set to `true` for inbound-ports firewall rules to be applied. | `list(string)` | <pre>[<br> "8443",<br> "9443",<br> "15017"<br>]</pre> | no |
@@ -257,6 +258,7 @@ Then perform the following commands on the root folder:
257258
| region | Cluster region |
258259
| release\_channel | The release channel of this cluster |
259260
| service\_account | The service account to default running nodes as if not overridden in `node_pools`. |
261+
| tpu\_ipv4\_cidr\_block | The IP range in CIDR notation used for the TPUs |
260262
| type | Cluster type (regional / zonal) |
261263
| vertical\_pod\_autoscaling\_enabled | Whether vertical pod autoscaling enabled |
262264
| zones | List of zones in which the cluster resides |

autogen/main/cluster.tf.tmpl

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -189,10 +189,9 @@ resource "google_container_cluster" "primary" {
189189
}
190190

191191
enable_kubernetes_alpha = var.enable_kubernetes_alpha
192-
192+
enable_tpu = var.enable_tpu
193193
{% if beta_cluster %}
194194
enable_intranode_visibility = var.enable_intranode_visibility
195-
enable_tpu = var.enable_tpu
196195

197196
dynamic "pod_security_policy_config" {
198197
for_each = var.enable_pod_security_policy ? [var.enable_pod_security_policy] : []

autogen/main/firewall.tf.tmpl

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,6 @@ resource "google_compute_firewall" "intra_egress" {
5757
}
5858

5959

60-
{% if beta_cluster %}
6160
/******************************************
6261
Allow egress to the TPU IPv4 CIDR block
6362

@@ -95,8 +94,6 @@ resource "google_compute_firewall" "tpu_egress" {
9594
{% endif %}
9695
}
9796

98-
99-
{% endif %}
10097
/******************************************
10198
Allow GKE master to hit non 443 ports for
10299
Webhooks/Admission Controllers

autogen/main/outputs.tf.tmpl

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,11 @@ output "identity_namespace" {
171171
]
172172
}
173173

174+
output "tpu_ipv4_cidr_block" {
175+
description = "The IP range in CIDR notation used for the TPUs"
176+
value = var.enable_tpu ? google_container_cluster.primary.tpu_ipv4_cidr_block : null
177+
}
178+
174179
{% if autopilot_cluster != true %}
175180
output "mesh_certificates_config" {
176181
description = "Mesh certificates configuration"
@@ -228,9 +233,4 @@ output "identity_service_enabled" {
228233
description = "Whether Identity Service is enabled"
229234
value = local.cluster_pod_security_policy_enabled
230235
}
231-
232-
output "tpu_ipv4_cidr_block" {
233-
description = "The IP range in CIDR notation used for the TPUs"
234-
value = var.enable_tpu ? google_container_cluster.primary.tpu_ipv4_cidr_block : null
235-
}
236236
{% endif %}

autogen/main/variables.tf.tmpl

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -600,13 +600,12 @@ variable "deletion_protection" {
600600
default = true
601601
}
602602

603-
{% if beta_cluster %}
604603
variable "enable_tpu" {
605604
type = bool
606605
description = "Enable Cloud TPU resources in the cluster. WARNING: changing this after cluster creation is destructive!"
607606
default = false
608607
}
609-
{% endif %}
608+
610609
{% if autopilot_cluster != true %}
611610
variable "network_policy" {
612611
type = bool

cluster.tf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ resource "google_container_cluster" "primary" {
146146
}
147147

148148
enable_kubernetes_alpha = var.enable_kubernetes_alpha
149-
149+
enable_tpu = var.enable_tpu
150150
dynamic "master_authorized_networks_config" {
151151
for_each = local.master_authorized_networks_config
152152
content {

firewall.tf

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,41 @@ resource "google_compute_firewall" "intra_egress" {
5555
}
5656

5757

58+
/******************************************
59+
Allow egress to the TPU IPv4 CIDR block
60+
61+
This rule is defined separately from the
62+
intra_egress rule above since it requires
63+
an output from the google_container_cluster
64+
resource.
65+
66+
https://github.com/terraform-google-modules/terraform-google-kubernetes-engine/issues/1124
67+
*****************************************/
68+
resource "google_compute_firewall" "tpu_egress" {
69+
count = var.add_cluster_firewall_rules && var.enable_tpu ? 1 : 0
70+
name = "gke-${substr(var.name, 0, min(36, length(var.name)))}-tpu-egress"
71+
description = "Managed by terraform gke module: Allow pods to communicate with TPUs"
72+
project = local.network_project_id
73+
network = var.network
74+
priority = var.firewall_priority
75+
direction = "EGRESS"
76+
77+
target_tags = [local.cluster_network_tag]
78+
destination_ranges = [google_container_cluster.primary.tpu_ipv4_cidr_block]
79+
80+
# Allow all possible protocols
81+
allow { protocol = "tcp" }
82+
allow { protocol = "udp" }
83+
allow { protocol = "icmp" }
84+
allow { protocol = "sctp" }
85+
allow { protocol = "esp" }
86+
allow { protocol = "ah" }
87+
88+
depends_on = [
89+
google_container_cluster.primary,
90+
]
91+
}
92+
5893
/******************************************
5994
Allow GKE master to hit non 443 ports for
6095
Webhooks/Admission Controllers

modules/beta-autopilot-private-cluster/firewall.tf

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,6 @@ resource "google_compute_firewall" "tpu_egress" {
8484

8585
}
8686

87-
8887
/******************************************
8988
Allow GKE master to hit non 443 ports for
9089
Webhooks/Admission Controllers

modules/beta-autopilot-private-cluster/outputs.tf

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,11 @@ output "identity_namespace" {
142142
]
143143
}
144144

145+
output "tpu_ipv4_cidr_block" {
146+
description = "The IP range in CIDR notation used for the TPUs"
147+
value = var.enable_tpu ? google_container_cluster.primary.tpu_ipv4_cidr_block : null
148+
}
149+
145150

146151

147152
output "master_ipv4_cidr_block" {
@@ -183,8 +188,3 @@ output "identity_service_enabled" {
183188
description = "Whether Identity Service is enabled"
184189
value = local.cluster_pod_security_policy_enabled
185190
}
186-
187-
output "tpu_ipv4_cidr_block" {
188-
description = "The IP range in CIDR notation used for the TPUs"
189-
value = var.enable_tpu ? google_container_cluster.primary.tpu_ipv4_cidr_block : null
190-
}

modules/beta-autopilot-private-cluster/variables.tf

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -433,6 +433,7 @@ variable "enable_tpu" {
433433
description = "Enable Cloud TPU resources in the cluster. WARNING: changing this after cluster creation is destructive!"
434434
default = false
435435
}
436+
436437
variable "database_encryption" {
437438
description = "Application-layer Secrets Encryption settings. The object format is {state = string, key_name = string}. Valid values of state are: \"ENCRYPTED\"; \"DECRYPTED\". key_name is the name of a CloudKMS key."
438439
type = list(object({ state = string, key_name = string }))

modules/beta-autopilot-public-cluster/firewall.tf

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,6 @@ resource "google_compute_firewall" "tpu_egress" {
9090
]
9191
}
9292

93-
9493
/******************************************
9594
Allow GKE master to hit non 443 ports for
9695
Webhooks/Admission Controllers

modules/beta-autopilot-public-cluster/outputs.tf

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,11 @@ output "identity_namespace" {
142142
]
143143
}
144144

145+
output "tpu_ipv4_cidr_block" {
146+
description = "The IP range in CIDR notation used for the TPUs"
147+
value = var.enable_tpu ? google_container_cluster.primary.tpu_ipv4_cidr_block : null
148+
}
149+
145150

146151

147152
output "cloudrun_enabled" {
@@ -173,8 +178,3 @@ output "identity_service_enabled" {
173178
description = "Whether Identity Service is enabled"
174179
value = local.cluster_pod_security_policy_enabled
175180
}
176-
177-
output "tpu_ipv4_cidr_block" {
178-
description = "The IP range in CIDR notation used for the TPUs"
179-
value = var.enable_tpu ? google_container_cluster.primary.tpu_ipv4_cidr_block : null
180-
}

modules/beta-autopilot-public-cluster/variables.tf

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -403,6 +403,7 @@ variable "enable_tpu" {
403403
description = "Enable Cloud TPU resources in the cluster. WARNING: changing this after cluster creation is destructive!"
404404
default = false
405405
}
406+
406407
variable "database_encryption" {
407408
description = "Application-layer Secrets Encryption settings. The object format is {state = string, key_name = string}. Valid values of state are: \"ENCRYPTED\"; \"DECRYPTED\". key_name is the name of a CloudKMS key."
408409
type = list(object({ state = string, key_name = string }))

modules/beta-private-cluster-update-variant/cluster.tf

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -152,10 +152,9 @@ resource "google_container_cluster" "primary" {
152152
}
153153
}
154154

155-
enable_kubernetes_alpha = var.enable_kubernetes_alpha
156-
157-
enable_intranode_visibility = var.enable_intranode_visibility
155+
enable_kubernetes_alpha = var.enable_kubernetes_alpha
158156
enable_tpu = var.enable_tpu
157+
enable_intranode_visibility = var.enable_intranode_visibility
159158

160159
dynamic "pod_security_policy_config" {
161160
for_each = var.enable_pod_security_policy ? [var.enable_pod_security_policy] : []

modules/beta-private-cluster-update-variant/firewall.tf

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,6 @@ resource "google_compute_firewall" "tpu_egress" {
8484

8585
}
8686

87-
8887
/******************************************
8988
Allow GKE master to hit non 443 ports for
9089
Webhooks/Admission Controllers

modules/beta-private-cluster-update-variant/outputs.tf

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,11 @@ output "identity_namespace" {
161161
]
162162
}
163163

164+
output "tpu_ipv4_cidr_block" {
165+
description = "The IP range in CIDR notation used for the TPUs"
166+
value = var.enable_tpu ? google_container_cluster.primary.tpu_ipv4_cidr_block : null
167+
}
168+
164169
output "mesh_certificates_config" {
165170
description = "Mesh certificates configuration"
166171
value = local.cluster_mesh_certificates_config
@@ -209,8 +214,3 @@ output "identity_service_enabled" {
209214
description = "Whether Identity Service is enabled"
210215
value = local.cluster_pod_security_policy_enabled
211216
}
212-
213-
output "tpu_ipv4_cidr_block" {
214-
description = "The IP range in CIDR notation used for the TPUs"
215-
value = var.enable_tpu ? google_container_cluster.primary.tpu_ipv4_cidr_block : null
216-
}

modules/beta-private-cluster-update-variant/variables.tf

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -578,6 +578,7 @@ variable "enable_tpu" {
578578
description = "Enable Cloud TPU resources in the cluster. WARNING: changing this after cluster creation is destructive!"
579579
default = false
580580
}
581+
581582
variable "network_policy" {
582583
type = bool
583584
description = "Enable network policy addon"

modules/beta-private-cluster/cluster.tf

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -152,10 +152,9 @@ resource "google_container_cluster" "primary" {
152152
}
153153
}
154154

155-
enable_kubernetes_alpha = var.enable_kubernetes_alpha
156-
157-
enable_intranode_visibility = var.enable_intranode_visibility
155+
enable_kubernetes_alpha = var.enable_kubernetes_alpha
158156
enable_tpu = var.enable_tpu
157+
enable_intranode_visibility = var.enable_intranode_visibility
159158

160159
dynamic "pod_security_policy_config" {
161160
for_each = var.enable_pod_security_policy ? [var.enable_pod_security_policy] : []

modules/beta-private-cluster/firewall.tf

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,6 @@ resource "google_compute_firewall" "tpu_egress" {
8484

8585
}
8686

87-
8887
/******************************************
8988
Allow GKE master to hit non 443 ports for
9089
Webhooks/Admission Controllers

modules/beta-private-cluster/outputs.tf

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,11 @@ output "identity_namespace" {
161161
]
162162
}
163163

164+
output "tpu_ipv4_cidr_block" {
165+
description = "The IP range in CIDR notation used for the TPUs"
166+
value = var.enable_tpu ? google_container_cluster.primary.tpu_ipv4_cidr_block : null
167+
}
168+
164169
output "mesh_certificates_config" {
165170
description = "Mesh certificates configuration"
166171
value = local.cluster_mesh_certificates_config
@@ -209,8 +214,3 @@ output "identity_service_enabled" {
209214
description = "Whether Identity Service is enabled"
210215
value = local.cluster_pod_security_policy_enabled
211216
}
212-
213-
output "tpu_ipv4_cidr_block" {
214-
description = "The IP range in CIDR notation used for the TPUs"
215-
value = var.enable_tpu ? google_container_cluster.primary.tpu_ipv4_cidr_block : null
216-
}

modules/beta-private-cluster/variables.tf

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -578,6 +578,7 @@ variable "enable_tpu" {
578578
description = "Enable Cloud TPU resources in the cluster. WARNING: changing this after cluster creation is destructive!"
579579
default = false
580580
}
581+
581582
variable "network_policy" {
582583
type = bool
583584
description = "Enable network policy addon"

modules/beta-public-cluster-update-variant/cluster.tf

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -152,10 +152,9 @@ resource "google_container_cluster" "primary" {
152152
}
153153
}
154154

155-
enable_kubernetes_alpha = var.enable_kubernetes_alpha
156-
157-
enable_intranode_visibility = var.enable_intranode_visibility
155+
enable_kubernetes_alpha = var.enable_kubernetes_alpha
158156
enable_tpu = var.enable_tpu
157+
enable_intranode_visibility = var.enable_intranode_visibility
159158

160159
dynamic "pod_security_policy_config" {
161160
for_each = var.enable_pod_security_policy ? [var.enable_pod_security_policy] : []

modules/beta-public-cluster-update-variant/firewall.tf

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,6 @@ resource "google_compute_firewall" "tpu_egress" {
9090
]
9191
}
9292

93-
9493
/******************************************
9594
Allow GKE master to hit non 443 ports for
9695
Webhooks/Admission Controllers

modules/beta-public-cluster-update-variant/outputs.tf

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,11 @@ output "identity_namespace" {
161161
]
162162
}
163163

164+
output "tpu_ipv4_cidr_block" {
165+
description = "The IP range in CIDR notation used for the TPUs"
166+
value = var.enable_tpu ? google_container_cluster.primary.tpu_ipv4_cidr_block : null
167+
}
168+
164169
output "mesh_certificates_config" {
165170
description = "Mesh certificates configuration"
166171
value = local.cluster_mesh_certificates_config
@@ -199,8 +204,3 @@ output "identity_service_enabled" {
199204
description = "Whether Identity Service is enabled"
200205
value = local.cluster_pod_security_policy_enabled
201206
}
202-
203-
output "tpu_ipv4_cidr_block" {
204-
description = "The IP range in CIDR notation used for the TPUs"
205-
value = var.enable_tpu ? google_container_cluster.primary.tpu_ipv4_cidr_block : null
206-
}

modules/beta-public-cluster-update-variant/variables.tf

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -548,6 +548,7 @@ variable "enable_tpu" {
548548
description = "Enable Cloud TPU resources in the cluster. WARNING: changing this after cluster creation is destructive!"
549549
default = false
550550
}
551+
551552
variable "network_policy" {
552553
type = bool
553554
description = "Enable network policy addon"

modules/beta-public-cluster/cluster.tf

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -152,10 +152,9 @@ resource "google_container_cluster" "primary" {
152152
}
153153
}
154154

155-
enable_kubernetes_alpha = var.enable_kubernetes_alpha
156-
157-
enable_intranode_visibility = var.enable_intranode_visibility
155+
enable_kubernetes_alpha = var.enable_kubernetes_alpha
158156
enable_tpu = var.enable_tpu
157+
enable_intranode_visibility = var.enable_intranode_visibility
159158

160159
dynamic "pod_security_policy_config" {
161160
for_each = var.enable_pod_security_policy ? [var.enable_pod_security_policy] : []

modules/beta-public-cluster/firewall.tf

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,6 @@ resource "google_compute_firewall" "tpu_egress" {
9090
]
9191
}
9292

93-
9493
/******************************************
9594
Allow GKE master to hit non 443 ports for
9695
Webhooks/Admission Controllers

modules/beta-public-cluster/outputs.tf

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,11 @@ output "identity_namespace" {
161161
]
162162
}
163163

164+
output "tpu_ipv4_cidr_block" {
165+
description = "The IP range in CIDR notation used for the TPUs"
166+
value = var.enable_tpu ? google_container_cluster.primary.tpu_ipv4_cidr_block : null
167+
}
168+
164169
output "mesh_certificates_config" {
165170
description = "Mesh certificates configuration"
166171
value = local.cluster_mesh_certificates_config
@@ -199,8 +204,3 @@ output "identity_service_enabled" {
199204
description = "Whether Identity Service is enabled"
200205
value = local.cluster_pod_security_policy_enabled
201206
}
202-
203-
output "tpu_ipv4_cidr_block" {
204-
description = "The IP range in CIDR notation used for the TPUs"
205-
value = var.enable_tpu ? google_container_cluster.primary.tpu_ipv4_cidr_block : null
206-
}

0 commit comments

Comments
 (0)