Skip to content

Commit 0f3d394

Browse files
committed
Merge branch 'refs/heads/main' into create-regional-replicated-disk
2 parents 23d36df + d9b2f9c commit 0f3d394

13 files changed

+822
-2
lines changed

tpu/create_tpu_spot.py

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
# Copyright 2024 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
import os
15+
16+
from google.cloud.tpu_v2 import Node
17+
18+
19+
def create_tpu_with_spot(
20+
project_id: str,
21+
zone: str,
22+
tpu_name: str,
23+
tpu_type: str = "v2-8",
24+
runtime_version: str = "tpu-vm-tf-2.17.0-pjrt",
25+
) -> Node:
26+
"""Creates a Cloud TPU node.
27+
Args:
28+
project_id (str): The ID of the Google Cloud project.
29+
zone (str): The zone where the TPU node will be created.
30+
tpu_name (str): The name of the TPU node.
31+
tpu_type (str, optional): The type of TPU to create.
32+
runtime_version (str, optional): The runtime version for the TPU.
33+
Returns:
34+
Node: The created TPU node.
35+
"""
36+
# [START tpu_vm_create_spot]
37+
from google.cloud import tpu_v2
38+
39+
# TODO (developer): Update and un-comment below lines
40+
# project_id = "your-project-id"
41+
# zone = "us-central1-b"
42+
# tpu_name = "tpu-name"
43+
# tpu_type = "v2-8"
44+
# runtime_version = "tpu-vm-tf-2.17.0-pjrt"
45+
46+
# Create a TPU node
47+
node = tpu_v2.Node()
48+
node.accelerator_type = tpu_type
49+
# To see available runtime version use command:
50+
# gcloud compute tpus versions list --zone={ZONE}
51+
node.runtime_version = runtime_version
52+
53+
# TODO: Wait for update of library to change preemptible to spot=True
54+
node.scheduling_config = tpu_v2.SchedulingConfig(preemptible=True)
55+
56+
request = tpu_v2.CreateNodeRequest(
57+
parent=f"projects/{project_id}/locations/{zone}",
58+
node_id=tpu_name,
59+
node=node,
60+
)
61+
62+
client = tpu_v2.TpuClient()
63+
operation = client.create_node(request=request)
64+
print("Waiting for operation to complete...")
65+
66+
response = operation.result()
67+
68+
print(response.scheduling_config)
69+
# Example response:
70+
# TODO: Update the response to include the scheduling config
71+
72+
# [END tpu_vm_create_spot]
73+
return response
74+
75+
76+
if __name__ == "__main__":
77+
PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT")
78+
ZONE = "us-central1-b"
79+
create_tpu_with_spot(PROJECT_ID, ZONE, "tpu-with-spot")

tpu/create_tpu_topology.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def create_cloud_tpu_with_topology(
4444
node = tpu_v2.Node()
4545
# Here we are creating a TPU v3-8 with 2x2 topology.
4646
node.accelerator_config = tpu_v2.AcceleratorConfig(
47-
type_=tpu_v2.AcceleratorConfig.Type.V3,
47+
type_=tpu_v2.AcceleratorConfig.Type.V2,
4848
topology="2x2",
4949
)
5050
node.runtime_version = runtime_version

tpu/delete_tpu.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,4 +45,4 @@ def delete_cloud_tpu(project_id: str, zone: str, tpu_name: str = "tpu-name") ->
4545
if __name__ == "__main__":
4646
PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT")
4747
ZONE = "us-central1-b"
48-
delete_cloud_tpu(PROJECT_ID, ZONE, "tpu-name12")
48+
delete_cloud_tpu(PROJECT_ID, ZONE, "tpu-name")

tpu/queued_resources_create.py

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
# Copyright 2024 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
import os
15+
16+
from google.cloud.tpu_v2alpha1 import CreateQueuedResourceRequest, Node
17+
18+
19+
def create_queued_resource(
20+
project_id: str,
21+
zone: str,
22+
tpu_name: str,
23+
tpu_type: str = "v2-8",
24+
runtime_version: str = "tpu-vm-tf-2.17.0-pjrt",
25+
queued_resource_name: str = "resource-name",
26+
) -> Node:
27+
# [START tpu_queued_resources_create]
28+
from google.cloud import tpu_v2alpha1
29+
30+
# TODO(developer): Update and un-comment below lines
31+
# project_id = "your-project-id"
32+
# zone = "us-central1-b"
33+
# tpu_name = "tpu-name"
34+
# tpu_type = "v2-8"
35+
# runtime_version = "tpu-vm-tf-2.17.0-pjrt"
36+
# queued_resource_name = "resource-name"
37+
38+
node = tpu_v2alpha1.Node()
39+
node.accelerator_type = tpu_type
40+
# To see available runtime version use command:
41+
# gcloud compute tpus versions list --zone={ZONE}
42+
node.runtime_version = runtime_version
43+
44+
node_spec = tpu_v2alpha1.QueuedResource.Tpu.NodeSpec()
45+
node_spec.parent = f"projects/{project_id}/locations/{zone}"
46+
node_spec.node_id = tpu_name
47+
node_spec.node = node
48+
49+
resource = tpu_v2alpha1.QueuedResource()
50+
resource.tpu = tpu_v2alpha1.QueuedResource.Tpu(node_spec=[node_spec])
51+
52+
request = CreateQueuedResourceRequest(
53+
parent=f"projects/{project_id}/locations/{zone}",
54+
queued_resource_id=queued_resource_name,
55+
queued_resource=resource,
56+
)
57+
58+
client = tpu_v2alpha1.TpuClient()
59+
operation = client.create_queued_resource(request=request)
60+
61+
response = operation.result()
62+
print(response.name)
63+
print(response.state.state)
64+
# Example response:
65+
# projects/[project_id]/locations/[zone]/queuedResources/resource-name
66+
# State.WAITING_FOR_RESOURCES
67+
68+
# [END tpu_queued_resources_create]
69+
return response
70+
71+
72+
if __name__ == "__main__":
73+
PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT")
74+
ZONE = "us-central1-b"
75+
create_queued_resource(
76+
project_id=PROJECT_ID,
77+
zone=ZONE,
78+
tpu_name="tpu-name",
79+
queued_resource_name="resource-name",
80+
)
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
# Copyright 2024 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
import os
15+
16+
from google.cloud.tpu_v2alpha1 import CreateQueuedResourceRequest, Node
17+
18+
19+
def create_queued_resource_network(
20+
project_id: str,
21+
zone: str,
22+
tpu_name: str,
23+
tpu_type: str = "v2-8",
24+
runtime_version: str = "tpu-vm-tf-2.17.0-pjrt",
25+
queued_resource_name: str = "resource-name",
26+
network: str = "default",
27+
) -> Node:
28+
# [START tpu_queued_resources_network]
29+
from google.cloud import tpu_v2alpha1
30+
31+
# TODO(developer): Update and un-comment below lines
32+
# project_id = "your-project-id"
33+
# zone = "us-central1-b"
34+
# tpu_name = "tpu-name"
35+
# tpu_type = "v2-8"
36+
# runtime_version = "tpu-vm-tf-2.17.0-pjrt"
37+
# queued_resource_name = "resource-name"
38+
# network = "default"
39+
40+
node = tpu_v2alpha1.Node()
41+
node.accelerator_type = tpu_type
42+
node.runtime_version = runtime_version
43+
# Setting network configuration
44+
node.network_config = tpu_v2alpha1.NetworkConfig(
45+
network=network, # Update if you want to use a specific network
46+
subnetwork="default", # Update if you want to use a specific subnetwork
47+
enable_external_ips=True,
48+
can_ip_forward=True,
49+
)
50+
51+
node_spec = tpu_v2alpha1.QueuedResource.Tpu.NodeSpec()
52+
node_spec.parent = f"projects/{project_id}/locations/{zone}"
53+
node_spec.node_id = tpu_name
54+
node_spec.node = node
55+
56+
resource = tpu_v2alpha1.QueuedResource()
57+
resource.tpu = tpu_v2alpha1.QueuedResource.Tpu(node_spec=[node_spec])
58+
59+
request = CreateQueuedResourceRequest(
60+
parent=f"projects/{project_id}/locations/{zone}",
61+
queued_resource_id=queued_resource_name,
62+
queued_resource=resource,
63+
)
64+
65+
client = tpu_v2alpha1.TpuClient()
66+
operation = client.create_queued_resource(request=request)
67+
68+
response = operation.result()
69+
print(response.name)
70+
print(response.tpu.node_spec[0].node.network_config)
71+
print(resource.tpu.node_spec[0].node.network_config.network == "default")
72+
# Example response:
73+
# network: "default"
74+
# subnetwork: "default"
75+
# enable_external_ips: true
76+
# can_ip_forward: true
77+
78+
# [END tpu_queued_resources_network]
79+
return response
80+
81+
82+
if __name__ == "__main__":
83+
PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT")
84+
ZONE = "us-central1-b"
85+
create_queued_resource_network(
86+
project_id=PROJECT_ID,
87+
zone=ZONE,
88+
tpu_name="tpu-name",
89+
queued_resource_name="resource-name",
90+
)
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
# Copyright 2024 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
import os
15+
16+
from google.cloud.tpu_v2alpha1 import CreateQueuedResourceRequest, Node
17+
18+
19+
def create_queued_resource_spot(
20+
project_id: str,
21+
zone: str,
22+
tpu_name: str,
23+
tpu_type: str = "v2-8",
24+
runtime_version: str = "tpu-vm-tf-2.17.0-pjrt",
25+
queued_resource_name: str = "resource-name",
26+
) -> Node:
27+
# [START tpu_queued_resources_create_spot]
28+
from google.cloud import tpu_v2alpha1
29+
30+
# TODO(developer): Update and un-comment below lines
31+
# project_id = "your-project-id"
32+
# zone = "us-central1-b"
33+
# tpu_name = "tpu-name"
34+
# tpu_type = "v2-8"
35+
# runtime_version = "tpu-vm-tf-2.17.0-pjrt"
36+
# queued_resource_name = "resource-name"
37+
38+
node = tpu_v2alpha1.Node()
39+
node.accelerator_type = tpu_type
40+
# To see available runtime version use command:
41+
# gcloud compute tpus versions list --zone={ZONE}
42+
node.runtime_version = runtime_version
43+
44+
node_spec = tpu_v2alpha1.QueuedResource.Tpu.NodeSpec()
45+
node_spec.parent = f"projects/{project_id}/locations/{zone}"
46+
node_spec.node_id = tpu_name
47+
node_spec.node = node
48+
49+
resource = tpu_v2alpha1.QueuedResource()
50+
resource.tpu = tpu_v2alpha1.QueuedResource.Tpu(node_spec=[node_spec])
51+
# Create a spot resource
52+
resource.spot = tpu_v2alpha1.QueuedResource.Spot()
53+
54+
request = CreateQueuedResourceRequest(
55+
parent=f"projects/{project_id}/locations/{zone}",
56+
queued_resource_id=queued_resource_name,
57+
queued_resource=resource,
58+
)
59+
60+
client = tpu_v2alpha1.TpuClient()
61+
operation = client.create_queued_resource(request=request)
62+
response = operation.result()
63+
64+
print(response.name)
65+
print(response.state.state)
66+
# Example response:
67+
# projects/[project_id]/locations/[zone]/queuedResources/resource-name
68+
# State.WAITING_FOR_RESOURCES
69+
70+
# [END tpu_queued_resources_create_spot]
71+
return response
72+
73+
74+
if __name__ == "__main__":
75+
PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT")
76+
ZONE = "us-central1-b"
77+
create_queued_resource_spot(
78+
project_id=PROJECT_ID,
79+
zone=ZONE,
80+
tpu_name="tpu-name",
81+
queued_resource_name="resource-name",
82+
)

0 commit comments

Comments
 (0)