feat: Add CLI support for model, server, and prompt configuration
This commit is contained in:
parent
da48c38d51
commit
49f785cf8d
14
README.md
14
README.md
@ -56,9 +56,19 @@ Event working with tools, when used with `agentic_search.py` worked, up to a poi
|
||||
|
||||
2. **Run the agent script:**
|
||||
```bash
|
||||
python agentic_search.py
|
||||
# Run with direct prompt
|
||||
python agentic_search.py --model "qwen3:32b" prompt "Your prompt here"
|
||||
|
||||
# Run with prompt from stdin
|
||||
echo "Your prompt" | python agentic_search.py prompt -
|
||||
|
||||
# Run with custom server and API key
|
||||
python agentic_search.py \
|
||||
--model "hf.co/unsloth/Qwen3-30B-A3B-GGUF:Q5_K_M" \
|
||||
--server "https://api.example.com/v1" \
|
||||
--api-key "your-key" \
|
||||
prompt "Your prompt"
|
||||
```
|
||||
This will execute the predefined query in the script, run the agent, print progress dots (`.`) for each response chunk, and finally output the full structured response and the extracted content.
|
||||
|
||||
## Dependencies
|
||||
|
||||
|
@ -1,73 +1,97 @@
|
||||
import json
|
||||
import sys
|
||||
import argparse
|
||||
from typing import Optional
|
||||
from rich.console import Console
|
||||
from rich.spinner import Spinner
|
||||
from qwen_agent.agents import Assistant
|
||||
from transformers import pipeline
|
||||
|
||||
# Define LLM
|
||||
llm_cfg = {
|
||||
# 'model': 'hf.co/unsloth/Qwen3-30B-A3B-GGUF:Q5_K_M',
|
||||
'model': 'qwen3:32b',
|
||||
def setup_argparse():
|
||||
parser = argparse.ArgumentParser(description='Qwen3 Agent CLI')
|
||||
parser.add_argument('--model', default='qwen3:32b',
|
||||
help='Model identifier (default: qwen3:32b)')
|
||||
parser.add_argument('--server', default='http://localhost:11434/v1',
|
||||
help='Model server URL (default: http://localhost:11434/v1)')
|
||||
parser.add_argument('--api-key', default='EMPTY',
|
||||
help='API key for the model server (default: EMPTY)')
|
||||
|
||||
subparsers = parser.add_subparsers(dest='command', help='Available commands')
|
||||
|
||||
# Prompt command
|
||||
prompt_parser = subparsers.add_parser('prompt', help='Run agent with a prompt')
|
||||
prompt_parser.add_argument('text', nargs='?', default='-',
|
||||
help='Prompt text or "-" for stdin (default: -)')
|
||||
|
||||
return parser
|
||||
|
||||
# Use a custom endpoint compatible with OpenAI API:
|
||||
'model_server': 'http://localhost:11434/v1', # api_base
|
||||
'api_key': 'EMPTY',
|
||||
def read_prompt(text: str) -> str:
|
||||
"""Read prompt from argument or stdin if text is '-'"""
|
||||
if text == '-':
|
||||
return sys.stdin.read().strip()
|
||||
return text
|
||||
|
||||
# Other parameters:
|
||||
# 'generate_cfg': {
|
||||
# # Add: When the response content is `<think>this is the thought</think>this is the answer;
|
||||
# # Do not add: When the response has been separated by reasoning_content and content.
|
||||
# 'thought_in_content': True,
|
||||
# },
|
||||
}
|
||||
|
||||
# Define Tools
|
||||
tools = [
|
||||
{'mcpServers': { # You can specify the MCP configuration file
|
||||
'time': {
|
||||
'command': 'uvx',
|
||||
'args': ['mcp-server-time', '--local-timezone=Europe/London']
|
||||
},
|
||||
"fetch": {
|
||||
"command": "uvx",
|
||||
"args": ["mcp-server-fetch"]
|
||||
},
|
||||
"ddg-search": {
|
||||
"command": "npx",
|
||||
"args": ["-y", "duckduckgo-mcp-server"]
|
||||
},
|
||||
def run_agent(model: str, server: str, api_key: str, prompt: str) -> None:
|
||||
"""Run the agent with the given configuration and prompt"""
|
||||
llm_cfg = {
|
||||
'model': model,
|
||||
'model_server': server,
|
||||
'api_key': api_key,
|
||||
}
|
||||
},
|
||||
'code_interpreter', # Built-in tools
|
||||
]
|
||||
|
||||
# Define Agent
|
||||
bot = Assistant(llm=llm_cfg, function_list=tools)
|
||||
console = Console()
|
||||
# Define Tools
|
||||
tools = [
|
||||
{'mcpServers': { # You can specify the MCP configuration file
|
||||
'time': {
|
||||
'command': 'uvx',
|
||||
'args': ['mcp-server-time', '--local-timezone=Europe/London']
|
||||
},
|
||||
"fetch": {
|
||||
"command": "uvx",
|
||||
"args": ["mcp-server-fetch"]
|
||||
},
|
||||
"ddg-search": {
|
||||
"command": "npx",
|
||||
"args": ["-y", "duckduckgo-mcp-server"]
|
||||
},
|
||||
}
|
||||
},
|
||||
'code_interpreter', # Built-in tools
|
||||
]
|
||||
|
||||
# Streaming generation
|
||||
messages = [{'role': 'user',
|
||||
'content':
|
||||
""""
|
||||
***Research** Updating models from https://huggingface.co .
|
||||
**Analyze** how can I find out if a model on hugging face is newer than the model I have now. For instance https://huggingface.co/unsloth/Qwen3-30B-A3B-GGUF
|
||||
pipe = pipeline("text-generation", model="Qwen/Qwen3-30B-A3B")
|
||||
**Answer** In English. Is there a versioning scheme for models on huggingface? Can I instruct ollama to pull a newer version of a model?"""}]
|
||||
# Define Agent
|
||||
bot = Assistant(llm=llm_cfg, function_list=tools)
|
||||
console = Console()
|
||||
|
||||
final_responses = None
|
||||
# Consider adding error handling around bot.run
|
||||
try:
|
||||
with console.status("[bold blue]Thinking...", spinner="dots") as status:
|
||||
for responses in bot.run(messages=messages, enable_thinking=True, max_tokens=30000):
|
||||
final_responses = responses.pop()
|
||||
except Exception as e:
|
||||
console.print(f"[bold red]An error occurred during agent execution:[/] {e}")
|
||||
# Streaming generation
|
||||
messages = [{'role': 'user', 'content': prompt}]
|
||||
|
||||
# Pretty-print the final response object
|
||||
if final_responses:
|
||||
console.print("\n[bold green]--- Full Response Object ---[/]")
|
||||
console.print(json.dumps(final_responses, indent=2))
|
||||
console.print("\n[bold green]--- Extracted Content ---[/]")
|
||||
console.print(final_responses.get('content', 'No content found in response.'))
|
||||
else:
|
||||
console.print("[bold red]No final response received from the agent.[/]")
|
||||
final_responses = None
|
||||
try:
|
||||
with console.status("[bold blue]Thinking...", spinner="dots") as status:
|
||||
for responses in bot.run(messages=messages, enable_thinking=True, max_tokens=30000):
|
||||
final_responses = responses.pop()
|
||||
except Exception as e:
|
||||
console.print(f"[bold red]An error occurred during agent execution:[/] {e}")
|
||||
|
||||
# Pretty-print the final response object
|
||||
if final_responses:
|
||||
console.print("\n[bold green]--- Full Response Object ---[/]")
|
||||
console.print(json.dumps(final_responses, indent=2))
|
||||
console.print("\n[bold green]--- Extracted Content ---[/]")
|
||||
console.print(final_responses.get('content', 'No content found in response.'))
|
||||
else:
|
||||
console.print("[bold red]No final response received from the agent.[/]")
|
||||
|
||||
def main():
|
||||
parser = setup_argparse()
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.command == 'prompt':
|
||||
prompt_text = read_prompt(args.text)
|
||||
run_agent(args.model, args.server, args.api_key, prompt_text)
|
||||
else:
|
||||
parser.print_help()
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
Loading…
x
Reference in New Issue
Block a user