from mlx_lm import load, generate, stream_generate model, tokenizer = load("mlx-community/Qwen3-0.6B-8bit") prompt = "/nothink How do I make best use Qwen3 0.6B modell?" if tokenizer.chat_template is not None: messages = [{"role": "user", "content": prompt}] prompt = tokenizer.apply_chat_template( messages, add_generation_prompt=True ) for response in stream_generate(model, tokenizer, prompt, max_tokens=2048): print(response.text, end="", flush=True) print()