Google Vertex AI Overview
Google Vertex AI is a unified ML platform that provides:
- Gemini Models - Google's most capable AI models
- Model Garden - Pre-trained models for various tasks
- Custom Training - Train your own models
- MLOps - End-to-end ML lifecycle management
Getting Started
Prerequisites
Initial Setup
# Install gcloud CLI
curl https://sdk.cloud.google.com | bash
# Initialize and authenticate
gcloud init
gcloud auth application-default login
# Enable Vertex AI API
gcloud services enable aiplatform.googleapis.com
# Set project
gcloud config set project YOUR_PROJECT_ID
Install SDK
pip install google-cloud-aiplatform
Using Gemini Models
Basic Generation
import vertexai
from vertexai.generative_models import GenerativeModel
# Initialize
vertexai.init(project="your-project-id", location="us-central1")
# Load model
model = GenerativeModel("gemini-1.5-pro")
# Generate content
response = model.generate_content("Explain quantum computing")
print(response.text)
Chat Conversations
from vertexai.generative_models import GenerativeModel
model = GenerativeModel("gemini-1.5-pro")
chat = model.start_chat()
# Multi-turn conversation
response1 = chat.send_message("What is machine learning?")
print(response1.text)
response2 = chat.send_message("How does it differ from traditional programming?")
print(response2.text)
# Access chat history
for message in chat.history:
print(f"{message.role}: {message.parts[0].text[:100]}...")
Streaming Responses
model = GenerativeModel("gemini-1.5-pro")
responses = model.generate_content(
"Write a detailed explanation of neural networks",
stream=True
)
for response in responses:
print(response.text, end="")
Multimodal Input (Images)
from vertexai.generative_models import GenerativeModel, Part
model = GenerativeModel("gemini-1.5-pro")
# From file
image = Part.from_image(open("image.jpg", "rb").read(), mime_type="image/jpeg")
# From URL
# image = Part.from_uri("gs://bucket/image.jpg", mime_type="image/jpeg")
response = model.generate_content([
"Describe what you see in this image:",
image
])
print(response.text)
System Instructions
model = GenerativeModel(
"gemini-1.5-pro",
system_instruction="You are a helpful coding assistant. Be concise and provide code examples."
)
response = model.generate_content("How do I read a file in Python?")
print(response.text)
Available Models
| Model | Use Case | Context Window |
| gemini-1.5-pro | Complex tasks, long context | 1M tokens |
| gemini-1.5-flash | Fast responses, efficiency | 1M tokens |
| gemini-1.0-pro | Balanced performance | 32K tokens |
Function Calling
Define Functions
from vertexai.generative_models import GenerativeModel, Tool, FunctionDeclaration
# Define function schema
get_weather = FunctionDeclaration(
name="get_weather",
description="Get the current weather for a location",
parameters={
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "City and state/country"
},
"unit": {
"type": "string",
"enum": ["celsius", "fahrenheit"],
"description": "Temperature unit"
}
},
"required": ["location"]
}
)
# Create tool
weather_tool = Tool(function_declarations=[get_weather])
# Use with model
model = GenerativeModel(
"gemini-1.5-pro",
tools=[weather_tool]
)
Handle Function Calls
response = model.generate_content("What's the weather in Tokyo?")
# Check for function call
if response.candidates[0].content.parts[0].function_call:
function_call = response.candidates[0].content.parts[0].function_call
# Execute the function
if function_call.name == "get_weather":
result = get_actual_weather(function_call.args["location"])
# Send result back
response = model.generate_content([
"What's the weather in Tokyo?",
response.candidates[0].content,
Part.from_function_response(
name="get_weather",
response={"weather": result}
)
])
print(response.text)
Embeddings
Generate Embeddings
from vertexai.language_models import TextEmbeddingModel
model = TextEmbeddingModel.from_pretrained("text-embedding-004")
texts = [
"What is machine learning?",
"How do neural networks work?",
"Best pizza in New York"
]
embeddings = model.get_embeddings(texts)
for text, embedding in zip(texts, embeddings):
print(f"Text: {text[:50]}...")
print(f"Embedding dimensions: {len(embedding.values)}")
print(f"First 5 values: {embedding.values[:5]}")
Semantic Search
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
# Generate embeddings for documents
documents = [
"Machine learning is a subset of AI",
"Deep learning uses neural networks",
"Python is a programming language",
"TensorFlow is an ML framework"
]
doc_embeddings = model.get_embeddings(documents)
doc_vectors = np.array([e.values for e in doc_embeddings])
# Search query
query = "What is deep learning?"
query_embedding = model.get_embeddings([query])[0]
query_vector = np.array(query_embedding.values).reshape(1, -1)
# Find most similar
similarities = cosine_similarity(query_vector, doc_vectors)[0]
top_indices = np.argsort(similarities)[::-1][:3]
print("Top matches:")
for idx in top_indices:
print(f" {documents[idx]} (score: {similarities[idx]:.3f})")
Vertex AI Search
For production RAG applications:
from google.cloud import discoveryengine
# Create search client
client = discoveryengine.SearchServiceClient()
# Search
request = discoveryengine.SearchRequest(
serving_config=f"projects/{project}/locations/{location}/collections/default_collection/dataStores/{datastore}/servingConfigs/default_config",
query="How to train a model?",
page_size=10
)
response = client.search(request)
for result in response.results:
print(f"Document: {result.document.name}")
print(f"Snippet: {result.document.derived_struct_data.get('snippet', '')}")
Best Practices
Safety Settings
from vertexai.generative_models import GenerativeModel, HarmCategory, HarmBlockThreshold
model = GenerativeModel(
"gemini-1.5-pro",
safety_settings={
HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
}
)
Generation Config
from vertexai.generative_models import GenerationConfig
config = GenerationConfig(
temperature=0.7,
top_p=0.95,
top_k=40,
max_output_tokens=2048,
stop_sequences=["END"]
)
response = model.generate_content(
"Write a story",
generation_config=config
)
Error Handling
from google.api_core import exceptions
try:
response = model.generate_content(prompt)
except exceptions.ResourceExhausted:
print("Rate limited - implement backoff")
except exceptions.InvalidArgument as e:
print(f"Invalid request: {e}")
except exceptions.GoogleAPIError as e:
print(f"API error: {e}")
Cost Management
# Track token usage
response = model.generate_content(prompt)
# Access usage metadata
usage = response.usage_metadata
print(f"Prompt tokens: {usage.prompt_token_count}")
print(f"Response tokens: {usage.candidates_token_count}")
print(f"Total tokens: {usage.total_token_count}")
Integration with Agent Frameworks
For OpenClaw/Clawdbot
Configure Google as a provider:
providers:
google:
project: "your-project-id"
location: "us-central1"
model: "gemini-1.5-pro"
REST API Direct
curl -X POST \
"https://us-central1-aiplatform.googleapis.com/v1/projects/YOUR_PROJECT/locations/us-central1/publishers/google/models/gemini-1.5-pro:generateContent" \
-H "Authorization: Bearer $(gcloud auth print-access-token)" \
-H "Content-Type: application/json" \
-d '{
"contents": [{
"role": "user",
"parts": [{"text": "Hello, Gemini!"}]
}]
}'
Conclusion
Google Vertex AI provides a comprehensive platform for AI development. With Gemini models offering massive context windows and multimodal capabilities, it's a powerful option for building AI applications.
Next: Building RAG Applications - Retrieval-augmented generation