agentic-search/agentic_search.py

56 lines
1.9 KiB
Python

import json # Import the json module
from qwen_agent.agents import Assistant
# Define LLM
llm_cfg = {
'model': 'qwen3:0.6B',
# Use the endpoint provided by Alibaba Model Studio:
# 'model_type': 'qwen_dashscope',
# 'api_key': os.getenv('DASHSCOPE_API_KEY'),
# Use a custom endpoint compatible with OpenAI API:
'model_server': 'http://localhost:11434/v1', # api_base
'api_key': 'EMPTY',
# Other parameters:
# 'generate_cfg': {
# # Add: When the response content is `<think>this is the thought</think>this is the answer;
# # Do not add: When the response has been separated by reasoning_content and content.
# 'thought_in_content': True,
# },
}
# Define Tools
tools = [
{'mcpServers': { # You can specify the MCP configuration file
'time': {
'command': 'uvx',
'args': ['mcp-server-time', '--local-timezone=Asia/Shanghai']
},
"fetch": {
"command": "uvx",
"args": ["mcp-server-fetch"]
}
}
},
'code_interpreter', # Built-in tools
]
# Define Agent
bot = Assistant(llm=llm_cfg, function_list=tools)
# Streaming generation
messages = [{'role': 'user', 'content': 'fetch https://qwenlm.github.io/blog/ and give me a summery of the energy efficiency of training and inference of Qwen'}]
# Initialize responses variable before the loop in case the loop doesn't run
final_responses = None
for responses in bot.run(messages=messages, enable_thinking=True,max_tokens=8192):
# The loop assigns the latest response to final_responses
final_responses = responses
# Pretty-print the final response object
if final_responses:
print(json.dumps(final_responses, indent=2)) # Use indent=2 (or 4) for pretty printing
print(final_responses['content'])
else:
print("No response received from the agent.")