Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions packages/gcloud-mcp/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
import pkg from '../package.json' with { type: 'json' };
import { createRunGcloudCommand } from './tools/run_gcloud_command.js';
import { createResearchGcloudCommand } from './tools/research_gcloud_command.js';
import * as gcloud from './gcloud.js';
import yargs, { ArgumentsCamelCase, CommandModule } from 'yargs';
import { hideBin } from 'yargs/helpers';
Expand Down Expand Up @@ -114,6 +115,7 @@ const main = async () => {
);
const acl = createAccessControlList(config.allow, [...default_deny, ...(config.deny ?? [])]);
createRunGcloudCommand(acl).register(server);
createResearchGcloudCommand().register(server);
await server.connect(new StdioServerTransport());
log.info('🚀 gcloud mcp server started');

Expand Down
166 changes: 166 additions & 0 deletions packages/gcloud-mcp/src/tools/research_gcloud_command.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
/**
* Copyright 2025 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
import { Mock, beforeEach, describe, expect, test, vi } from 'vitest';
import * as gcloud from '../gcloud.js';
import { createResearchGcloudCommand } from './research_gcloud_command.js';

vi.mock('../gcloud.js');

const mockServer = {
registerTool: vi.fn(),
} as unknown as McpServer;

const getToolImplementation = () => {
expect(mockServer.registerTool).toHaveBeenCalledOnce();
return (mockServer.registerTool as Mock).mock.calls[0]![2];
};

const createTool = () => {
createResearchGcloudCommand().register(mockServer);
return getToolImplementation();
};

describe('createResearchGcloudCommand', () => {
beforeEach(() => {
vi.clearAllMocks();
});

test('returns help and global flags on success', async () => {
const tool = createTool();
const inputArgs = ['compute', 'instances', 'list'];

const helpStdout = '# Help Output';
// Construct global flags output such that we can verify the slicing logic
// globalFlagsIndex will be 1 (0-based)
// We want lines after it.
const globalStdout = [
'PREAMBLE',
'GLOBAL FLAGS',
'flag1',
'flag2',
'flag3',
'flag4',
'flag5',
'flag6',
'flag7',
'flag8',
'flag9',
'flag10',
'flag11-should-be-excluded',
].join('\n');

const mockedInvoke = vi.mocked(gcloud.invoke);
mockedInvoke.mockImplementation(async (args) => {
if (args.includes('--document=style=markdown')) {
return { code: 0, stdout: helpStdout, stderr: '' };
}
if (args.includes('--format=markdown(global_flags)')) {
return { code: 0, stdout: globalStdout, stderr: '' };
}
return { code: 1, stdout: '', stderr: 'Unknown command' };
});

const result = await tool({ command_parts: inputArgs });

expect(gcloud.invoke).toHaveBeenCalledTimes(2);
// Verify gcloud command for help was called with correct args
expect(gcloud.invoke).toHaveBeenCalledWith([
'compute',
'instances',
'list',
'--document=style=markdown',
]);
// Verify gcloud command for global flags was called
expect(gcloud.invoke).toHaveBeenCalledWith(['help', '--format=markdown(global_flags)']);

const output = JSON.parse(result.content[0].text);
expect(output.status).toBe('success');
expect(output.documentation).toContain(helpStdout);
expect(output.documentation).toContain('flag1');
expect(output.documentation).toContain('flag10');
expect(output.documentation).not.toContain('PREAMBLE');
expect(output.documentation).not.toContain('flag11-should-be-excluded');
expect(output.instructions_for_agent.next_step).toBe('VERIFY');
});

test('returns error when help command fails', async () => {
const tool = createTool();
const inputArgs = ['compute', 'instances', 'list'];

const mockedInvoke = vi.mocked(gcloud.invoke);
mockedInvoke.mockImplementation(async (args) => {
if (args.includes('--document=style=markdown')) {
return { code: 1, stdout: '', stderr: 'Help command failed' };
}
return { code: 0, stdout: '', stderr: '' };
});

const result = await tool({ command_parts: inputArgs });

// Should stop after first failure
expect(gcloud.invoke).toHaveBeenCalledTimes(1);
expect(result.isError).toBe(true);
const output = JSON.parse(result.content[0].text);
expect(output.status).toBe('failure');
expect(output.reason).toBe('invalid command or group');
expect(output.instructions_for_agent.next_step).toBe('RESEARCH');
expect(output.error_details).toContain('Help command failed');
});

test('returns success with warning when global flags command fails', async () => {
const tool = createTool();
const inputArgs = ['compute', 'instances', 'list'];
const helpStdout = '# Help Output';

const mockedInvoke = vi.mocked(gcloud.invoke);
mockedInvoke.mockImplementation(async (args) => {
if (args.includes('--document=style=markdown')) {
return { code: 0, stdout: helpStdout, stderr: '' };
}
if (args.includes('--format=markdown(global_flags)')) {
return { code: 1, stdout: '', stderr: 'Global flags failed' };
}
return { code: 0, stdout: '', stderr: '' };
});

const result = await tool({ command_parts: inputArgs });

expect(gcloud.invoke).toHaveBeenCalledTimes(2);
expect(result.isError).toBeUndefined();
const output = JSON.parse(result.content[0].text);
expect(output.status).toBe('success');
expect(output.documentation).toContain(helpStdout);
// We expect the result to still be successful, just without global flags
});

test('returns error when unexpected exception occurs', async () => {
const tool = createTool();
const inputArgs = ['compute', 'instances', 'list'];

const mockedInvoke = vi.mocked(gcloud.invoke);
mockedInvoke.mockRejectedValue(new Error('Unexpected error'));

const result = await tool({ command_parts: inputArgs });

expect(result.isError).toBe(true);
const output = JSON.parse(result.content[0].text);
expect(output.status).toBe('failure');
expect(output.reason).toBe('execution error');
expect(output.error).toBe('Unexpected error');
});
});
163 changes: 163 additions & 0 deletions packages/gcloud-mcp/src/tools/research_gcloud_command.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
/**
* Copyright 2025 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
import * as gcloud from '../gcloud.js';
import { z } from 'zod';
import { log } from '../utility/logger.js';

export const createResearchGcloudCommand = () => ({
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When testing, there are still ANSI escape codes in the content that make the output pretty noisy.

register: (server: McpServer) => {
server.registerTool(
'research_gcloud_command',
{
title: 'Research gcloud command',
inputSchema: {
Comment on lines +27 to +28
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
title: 'Research gcloud command',
inputSchema: {
title: 'Research gcloud command',
annotations: {
readOnlyHint: true,
destructiveHint: false,
idempotentHint: true,
openWorldHint: false,
},
inputSchema: {

The readOnlyHint is my primary recommendation here. Unsure if the others are necessary, but I don't think they hurt. See also https://modelcontextprotocol.io/legacy/concepts/tools#available-tool-annotations

command_parts: z
.array(z.string())
.describe(
"The ordered list of command groups and the command itself. Example: for `gcloud compute instances list`, pass `['compute', 'instances', 'list']`. Do not include flags starting with `--`.",
),
},
description: `Retrieves the official help text and reference documentation for a Google Cloud CLI (gcloud) command.

**CRITICAL INSTRUCTION**: This is a MANDATORY PRECURSOR to the \`run_gcloud_command\` tool. You must use this tool to 'read the manual' before attempting to execute any command.

**Workflow**:
1. **Research**: Call this tool with the target command path (e.g., \`['compute', 'instances', 'list']\`). Do NOT include flags (e.g., \`--project\`, \`--zone\`) in the input arguments.
2. **Verify**: The tool returns the official documentation. You must STOP and analyze this text. Check if your intended flags exist and if your argument syntax is correct.
3. **Execute**: Only after this verification step is complete, proceed to call \`run_gcloud_command\` with the validated arguments.

Use this tool to prevent syntax errors and hallucinated flags.`,
},
async ({ command_parts }) => {
const args = command_parts as string[];
const toolLogger = log.mcp('research_gcloud_command', args);

try {
toolLogger.info('Executing research_gcloud_command');

// Part 1: gcloud [args] --document=style=markdown
const helpCmdArgs = [...args, '--document=style=markdown'];
const {
code: helpCode,
stdout: helpStdout,
stderr: helpStderr,
} = await gcloud.invoke(helpCmdArgs);

if (helpCode !== 0) {
toolLogger.error(
`Failed to get help for command '${args.join(' ')}'.\nSTDERR:\n${helpStderr}`,
);
return errorTextResult(
JSON.stringify(
{
status: 'failure',
reason: 'invalid command or group',
instructions_for_agent: {
next_step: 'RESEARCH',
guidance: 'STOP making assumptions. Perform a search for the correct command.',
},
error_details: helpStderr,
},
null,
2,
),
);
}

// Part 2: gcloud help --format="markdown(global_flags)"
// and filter: grep -A10 "GLOBAL FLAGS" | tail -n +2 | head -n 10
const globalFlagsArgs = ['help', '--format=markdown(global_flags)'];
const {
code: globalCode,
stdout: globalStdout,
stderr: globalStderr,
} = await gcloud.invoke(globalFlagsArgs);

let globalFlagsOutput = '';
if (globalCode === 0) {
const lines = globalStdout.split('\n');
const globalFlagsIndex = lines.findIndex((line) => line.includes('GLOBAL FLAGS'));

if (globalFlagsIndex !== -1) {
// grep -A10 "GLOBAL FLAGS" includes the match and 10 lines after.
// tail -n +2 skips the first line (the match).
// head -n 10 takes the next 10 lines.
// So we want lines from globalFlagsIndex + 1 to globalFlagsIndex + 1 + 10 (exclusive)
Comment on lines +97 to +100
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure whether this produces the best outcome -- it has the potential of truncating a global flag description in the middle, and arbitrarily removing the flags after the 10th line.

Should we simply remove them from the output if filtering is improving quality?

globalFlagsOutput = lines
.slice(globalFlagsIndex + 1, globalFlagsIndex + 11)
.join('\n');
}
} else {
toolLogger.warn(`Failed to get global flags help.\nSTDERR:\n${globalStderr}`);
}

const combinedDocumentation = `
${helpStdout}

## GLOBAL FLAGS (Partial)
${globalFlagsOutput}
`;

const result = JSON.stringify(
{
status: 'success',
documentation: combinedDocumentation,
instructions_for_agent: {
next_step: 'VERIFY',
guidance:
"Compare your user's request against the above documentation. Identify any missing required flags. Ensure the command description aligns with the goal. Confirm the syntax for 'zone' and 'project'. Formulate the final command arguments to strictly adhere to this documentation.",
},
},
null,
2,
);

return successfulTextResult(result);
} catch (e: unknown) {
toolLogger.error(
'research_gcloud_command failed',
e instanceof Error ? e : new Error(String(e)),
);
const msg = e instanceof Error ? e.message : 'An unknown error occurred.';
return errorTextResult(
JSON.stringify(
{
status: 'failure',
reason: 'execution error',
error: msg,
},
null,
2,
),
);
}
},
);
},
});

type TextResultType = { content: [{ type: 'text'; text: string }]; isError?: boolean };

const successfulTextResult = (text: string): TextResultType => ({
content: [{ type: 'text', text }],
});

const errorTextResult = (text: string): TextResultType => ({
content: [{ type: 'text', text }],
isError: true,
});