agentic-search/agentic_search.py

import json # Import the json module
from qwen_agent.agents import Assistant

# Define LLM
llm_cfg = {
    'model': 'qwen3:0.6B',

    # Use the endpoint provided by Alibaba Model Studio:
    # 'model_type': 'qwen_dashscope',
    # 'api_key': os.getenv('DASHSCOPE_API_KEY'),

    # Use a custom endpoint compatible with OpenAI API:
    'model_server': 'http://localhost:11434/v1',  # api_base
    'api_key': 'EMPTY',

    # Other parameters:
    # 'generate_cfg': {
    #         # Add: When the response content is `<think>this is the thought</think>this is the answer;
    #         # Do not add: When the response has been separated by reasoning_content and content.
    #         'thought_in_content': True,
    #     },
}

# Define Tools
tools = [
    {'mcpServers': {  # You can specify the MCP configuration file
            'time': {
                'command': 'uvx',
                'args': ['mcp-server-time', '--local-timezone=Asia/Shanghai']
            },
            "fetch": {
                "command": "uvx",
                "args": ["mcp-server-fetch"]
            }
        }
    },
  'code_interpreter',  # Built-in tools
]

# Define Agent
bot = Assistant(llm=llm_cfg, function_list=tools)

# Streaming generation
messages = [{'role': 'user', 'content': 'fetch https://qwenlm.github.io/blog/ and give me a summery of the energy efficiency of training and inference of Qwen'}]
# Initialize responses variable before the loop in case the loop doesn't run
final_responses = None
for responses in bot.run(messages=messages, enable_thinking=True,max_tokens=8192):
    # The loop assigns the latest response to final_responses
    final_responses = responses

# Pretty-print the final response object
if final_responses:
    print(json.dumps(final_responses, indent=2)) # Use indent=2 (or 4) for pretty printing
    print(final_responses['content'])
else:
    print("No response received from the agent.")