feat: Add CLI support for model, server, and prompt configuration

This commit is contained in:
Willem van den Ende (aider) 2025-05-04 09:24:38 +01:00
parent da48c38d51
commit 49f785cf8d
2 changed files with 96 additions and 62 deletions

View File

@ -56,9 +56,19 @@ Event working with tools, when used with `agentic_search.py` worked, up to a poi
2. **Run the agent script:**
```bash
python agentic_search.py
# Run with direct prompt
python agentic_search.py --model "qwen3:32b" prompt "Your prompt here"
# Run with prompt from stdin
echo "Your prompt" | python agentic_search.py prompt -
# Run with custom server and API key
python agentic_search.py \
--model "hf.co/unsloth/Qwen3-30B-A3B-GGUF:Q5_K_M" \
--server "https://api.example.com/v1" \
--api-key "your-key" \
prompt "Your prompt"
```
This will execute the predefined query in the script, run the agent, print progress dots (`.`) for each response chunk, and finally output the full structured response and the extracted content.
## Dependencies

View File

@ -1,73 +1,97 @@
import json
import sys
import argparse
from typing import Optional
from rich.console import Console
from rich.spinner import Spinner
from qwen_agent.agents import Assistant
from transformers import pipeline
# Define LLM
llm_cfg = {
# 'model': 'hf.co/unsloth/Qwen3-30B-A3B-GGUF:Q5_K_M',
'model': 'qwen3:32b',
def setup_argparse():
parser = argparse.ArgumentParser(description='Qwen3 Agent CLI')
parser.add_argument('--model', default='qwen3:32b',
help='Model identifier (default: qwen3:32b)')
parser.add_argument('--server', default='http://localhost:11434/v1',
help='Model server URL (default: http://localhost:11434/v1)')
parser.add_argument('--api-key', default='EMPTY',
help='API key for the model server (default: EMPTY)')
subparsers = parser.add_subparsers(dest='command', help='Available commands')
# Prompt command
prompt_parser = subparsers.add_parser('prompt', help='Run agent with a prompt')
prompt_parser.add_argument('text', nargs='?', default='-',
help='Prompt text or "-" for stdin (default: -)')
return parser
# Use a custom endpoint compatible with OpenAI API:
'model_server': 'http://localhost:11434/v1', # api_base
'api_key': 'EMPTY',
def read_prompt(text: str) -> str:
"""Read prompt from argument or stdin if text is '-'"""
if text == '-':
return sys.stdin.read().strip()
return text
# Other parameters:
# 'generate_cfg': {
# # Add: When the response content is `<think>this is the thought</think>this is the answer;
# # Do not add: When the response has been separated by reasoning_content and content.
# 'thought_in_content': True,
# },
}
# Define Tools
tools = [
{'mcpServers': { # You can specify the MCP configuration file
'time': {
'command': 'uvx',
'args': ['mcp-server-time', '--local-timezone=Europe/London']
},
"fetch": {
"command": "uvx",
"args": ["mcp-server-fetch"]
},
"ddg-search": {
"command": "npx",
"args": ["-y", "duckduckgo-mcp-server"]
},
def run_agent(model: str, server: str, api_key: str, prompt: str) -> None:
"""Run the agent with the given configuration and prompt"""
llm_cfg = {
'model': model,
'model_server': server,
'api_key': api_key,
}
},
'code_interpreter', # Built-in tools
]
# Define Agent
bot = Assistant(llm=llm_cfg, function_list=tools)
console = Console()
# Define Tools
tools = [
{'mcpServers': { # You can specify the MCP configuration file
'time': {
'command': 'uvx',
'args': ['mcp-server-time', '--local-timezone=Europe/London']
},
"fetch": {
"command": "uvx",
"args": ["mcp-server-fetch"]
},
"ddg-search": {
"command": "npx",
"args": ["-y", "duckduckgo-mcp-server"]
},
}
},
'code_interpreter', # Built-in tools
]
# Streaming generation
messages = [{'role': 'user',
'content':
""""
***Research** Updating models from https://huggingface.co .
**Analyze** how can I find out if a model on hugging face is newer than the model I have now. For instance https://huggingface.co/unsloth/Qwen3-30B-A3B-GGUF
pipe = pipeline("text-generation", model="Qwen/Qwen3-30B-A3B")
**Answer** In English. Is there a versioning scheme for models on huggingface? Can I instruct ollama to pull a newer version of a model?"""}]
# Define Agent
bot = Assistant(llm=llm_cfg, function_list=tools)
console = Console()
final_responses = None
# Consider adding error handling around bot.run
try:
with console.status("[bold blue]Thinking...", spinner="dots") as status:
for responses in bot.run(messages=messages, enable_thinking=True, max_tokens=30000):
final_responses = responses.pop()
except Exception as e:
console.print(f"[bold red]An error occurred during agent execution:[/] {e}")
# Streaming generation
messages = [{'role': 'user', 'content': prompt}]
# Pretty-print the final response object
if final_responses:
console.print("\n[bold green]--- Full Response Object ---[/]")
console.print(json.dumps(final_responses, indent=2))
console.print("\n[bold green]--- Extracted Content ---[/]")
console.print(final_responses.get('content', 'No content found in response.'))
else:
console.print("[bold red]No final response received from the agent.[/]")
final_responses = None
try:
with console.status("[bold blue]Thinking...", spinner="dots") as status:
for responses in bot.run(messages=messages, enable_thinking=True, max_tokens=30000):
final_responses = responses.pop()
except Exception as e:
console.print(f"[bold red]An error occurred during agent execution:[/] {e}")
# Pretty-print the final response object
if final_responses:
console.print("\n[bold green]--- Full Response Object ---[/]")
console.print(json.dumps(final_responses, indent=2))
console.print("\n[bold green]--- Extracted Content ---[/]")
console.print(final_responses.get('content', 'No content found in response.'))
else:
console.print("[bold red]No final response received from the agent.[/]")
def main():
parser = setup_argparse()
args = parser.parse_args()
if args.command == 'prompt':
prompt_text = read_prompt(args.text)
run_agent(args.model, args.server, args.api_key, prompt_text)
else:
parser.print_help()
if __name__ == '__main__':
main()