Building a Local Multi-Agent AI System with TinyLlama
This guide delves into creating a streamlined, offline multi-agent AI framework using TinyLlama, leveraging a manager-agent architecture. We demonstrate how to break down complex objectives into manageable subtasks, enable seamless collaboration among specialized AI agents, and implement autonomous reasoning cycles-all without depending on external APIs. By utilizing the transformers library, this approach ensures a lightweight, transparent, and fully customizable system that runs entirely on local hardware.
Core Components: Defining Agents and Tasks
We begin by importing essential libraries and establishing the foundational data models for managing agents and tasks. The Task and Agent classes serve as structured blueprints, enabling clear orchestration and consistent handling of work units throughout the system.
from dataclasses import dataclass, asdict
from typing import List, Any, Dict
from datetime import datetime
@dataclass
class Task:
id: str
description: str
assignedto: str = None
status: str = "pending"
result: Any = None
dependencies: List[str] = None
def postinit(self):
if self.dependencies is None:
self.dependencies = []
@dataclass
class Agent:
name: str
role: str
expertise: str
systemprompt: str
Registering Specialized Agents and Initializing TinyLlama Locally
We define a registry of expert agents, each tailored for specific roles such as research, coding, writing, and data analysis. The LocalLLM class wraps TinyLlama, loading it in an optimized 4-bit quantized mode to ensure efficient performance on local machines or cloud environments like Colab. This setup empowers each agent to generate context-aware responses independently.
AGENTREGISTRY = {
"researcher": Agent(
name="researcher",
role="Research Specialist",
expertise="Gathering and synthesizing information",
systemprompt="You are a research expert. Provide detailed and accurate research."
),
"developer": Agent(
name="developer",
role="Software Engineer",
expertise="Crafting clean, efficient code",
systemprompt="You are a skilled programmer. Write well-structured and documented code."
),
"editor": Agent(
name="editor",
role="Content Creator",
expertise="Clear and engaging communication",
systemprompt="You are a professional writer. Produce clear and compelling content."
),
"dataanalyst": Agent(
name="dataanalyst",
role="Data Specialist",
expertise="Interpreting data and extracting insights",
systemprompt="You are a data analyst. Deliver concise and insightful data interpretations."
)
}
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
class LocalLLM:
def init(self, modelname="TinyLlama/TinyLlama-1.1B-Chat-v1.0"):
self.tokenizer = AutoTokenizer.frompretrained(modelname)
quantconfig = BitsAndBytesConfig(
loadin4bit=True,
bnb4bitcomputedtype=torch.float16
) if torch.cuda.isavailable() else None
self.model = AutoModelForCausalLM.frompretrained(
modelname,
quantizationconfig=quantconfig,
devicemap="auto",
lowcpumemusage=True
)
if self.tokenizer.padtoken is None:
self.tokenizer.padtoken = self.tokenizer.eostoken
def generate(self, prompt: str, maxtokens: int = 300) -> str:
formattedprompt = f"<|system|>nYou are a helpful AI assistant.n</s>n<|user|>n{prompt}n</s>n<|assistant|>n"
inputs = self.tokenizer(
formattedprompt,
returntensors="pt",
truncation=True,
maxlength=1024,
padding=True
)
inputs = {k: v.to(self.model.device) for k, v in inputs.items()}
with torch.nograd():
outputs = self.model.generate(
*inputs,
maxnewtokens=maxtokens,
temperature=0.7,
dosample=True,
topp=0.9,
padtokenid=self.tokenizer.padtokenid,
eostokenid=self.tokenizer.eostokenid,
usecache=True
)
decoded = self.tokenizer.decode(outputs[0], skipspecialtokens=True)
if "<|assistant|>" in decoded:
return decoded.split("<|assistant|>")[-1].strip()
return decoded[len(formattedprompt):].strip()
Manager Agent: Orchestrating Task Breakdown and Execution
The ManagerAgent class is responsible for decomposing overarching goals into actionable subtasks, assigning them to the most suitable agents, and managing their execution while respecting task dependencies. This mirrors the workflow of a human project manager, enabling stepwise progress and clear accountability.
import json
import re
class ManagerAgent:
def init(self, modelname="TinyLlama/TinyLlama-1.1B-Chat-v1.0"):
self.llm = LocalLLM(modelname)
self.agents = AGENTREGISTRY
self.tasks: Dict[str, Task] = {}
self.executionlog = []
def log(self, message: str):
timestamp = datetime.now().strftime("%H:%M:%S")
entry = f"[{timestamp}] {message}"
self.executionlog.append(entry)
print(entry)
def decomposegoal(self, goal: str) -> List[Task]:
self.log(f"🎯 Breaking down goal: {goal}")
agentdescriptions = "n".join([f"- {name}: {agent.expertise}" for name, agent in self.agents.items()])
prompt = f"""Divide the following goal into three distinct subtasks, assigning each to the most appropriate agent.
Goal: {goal}
Available agents:
{agentdescriptions}
Respond ONLY with a JSON array."""
response = self.llm.generate(prompt, maxtokens=250)
try:
jsonmatch = re.search(r'[.]', response, re.DOTALL)
tasksdata = json.loads(jsonmatch.group()) if jsonmatch else self.defaulttasks(goal)
except Exception:
tasksdata = self.defaulttasks(goal)
tasks = []
for i, taskinfo in enumerate(tasksdata[:3]):
task = Task(
id=taskinfo.get('id', f'task{i+1}'),
description=taskinfo.get('description', f'Work on: {goal}'),
assignedto=taskinfo.get('assignedto', list(self.agents.keys())[i % len(self.agents)]),
dependencies=taskinfo.get('dependencies', [] if i == 0 else [f'task{i}'])
)
self.tasks[task.id] = task
tasks.append(task)
self.log(f" ✔ {task.id}: {task.description[:50]}... → {task.assignedto}")
return tasks
Fallback Task Generation and Task Execution Workflow
In cases where automatic task decomposition fails, the system falls back to predefined task templates based on the nature of the goal. Each task is executed by its assigned agent, guided by tailored system prompts and enriched with context from dependent tasks to maintain coherence and relevance.
def defaulttasks(self, goal: str) -> List[Dict]:
keywords = ['code', 'program', 'implement', 'algorithm']
if any(word in goal.lower() for word in keywords):
return [
{"id": "task1", "description": f"Research and explain the concept: {goal}", "assignedto": "researcher", "dependencies": []},
{"id": "task2", "description": f"Develop code implementation for: {goal}", "assignedto": "developer", "dependencies": ["task1"]},
{"id": "task3", "description": f"Write documentation and usage examples", "assignedto": "editor", "dependencies": ["task2"]}
]
return [
{"id": "task1", "description": f"Conduct research on: {goal}", "assignedto": "researcher", "dependencies": []},
{"id": "task2", "description": f"Analyze research findings and organize content", "assignedto": "dataanalyst", "dependencies": ["task1"]},
{"id": "task3", "description": f"Compose a detailed and clear response", "assignedto": "editor", "dependencies": ["task2"]}
]
def executetask(self, task: Task, context: Dict[str, Any] = None) -> str:
self.log(f"🤖 Executing {task.id} assigned to {task.assignedto}")
task.status = "inprogress"
agent = self.agents[task.assignedto]
contextinfo = ""
if context and task.dependencies:
contextinfo = "nnContext from prior tasks:n"
for dep in task.dependencies:
if dep in context:
snippet = context[dep][:150].replace('n', ' ') + "..."
contextinfo += f"- {snippet}n"
prompt = f"""{agent.systemprompt}
Task: {task.description}{contextinfo}
Please provide a concise and clear response:"""
result = self.llm.generate(prompt, maxtokens=250)
task.result = result
task.status = "completed"
self.log(f" ✔ Finished {task.id}")
return result
Aggregating Subtask Outputs into a Unified Final Response
After all subtasks are completed, their outputs are merged into a comprehensive final answer. This synthesis step ensures that the overall goal is addressed cohesively, reflecting the combined expertise of all agents involved.
def synthesizeresults(self, goal: str, results: Dict[str, str]) -> str:
self.log("🔄 Combining subtask results into final output")
combinedtext = "nn".join([f"Task {tid}:n{res[:200]}" for tid, res in results.items()])
prompt = f"""Integrate the following task results into a single, coherent response.
Original Goal: {goal}
Task Results:
{combinedtext}
Final comprehensive answer:"""
return self.llm.generate(prompt, maxtokens=350)
Coordinating the Full Multi-Agent Workflow
The manager oversees the entire process, ensuring tasks are executed in the correct order based on dependencies. It iterates through tasks, executing those whose prerequisites are met, until all are completed. The final synthesized output and a detailed execution log are then returned.
def executegoal(self, goal: str) -> Dict[str, Any]:
self.log("n" + "="60)
self.log("🎬 Initiating Manager Agent Workflow")
self.log("="60)
tasks = self.decomposegoal(goal)
results = {}
completedtasks = set()
maxcycles = len(tasks) 2
cycle = 0
while len(completedtasks) cycles:
cycle += 1
for task in tasks:
if task.id in completedtasks:
continue
if all(dep in completedtasks for dep in task.dependencies):
output = self.executetask(task, results)
results[task.id] = output
completedtasks.add(task.id)
finalanswer = self.synthesizeresults(goal, results)
self.log("n" + "="60)
self.log("✅ Workflow Complete!")
self.log("="60)
return {
"goal": goal,
"tasks": [asdict(t) for t in tasks],
"finaloutput": finalanswer,
"executionlog": self.executionlog
}
Interactive Examples to Test the System
To illustrate the system’s capabilities, we provide sample functions that demonstrate how to run the manager with different objectives, from explaining algorithms to coding tasks or custom user-defined goals.
def exampleexplanation():
manager = ManagerAgent()
goal = "Describe the quicksort algorithm with a practical example"
result = manager.executegoal(goal)
print("n" + "="60)
print("FINAL OUTPUT")
print("="60)
print(result["finaloutput"])
return result
def examplecoding():
manager = ManagerAgent()
goal = "Create a function to calculate the factorial of a number"
result = manager.executegoal(goal)
print("n" + "="60)
print("FINAL OUTPUT")
print("="60)
print(result["finaloutput"])
return result
def examplecustomtask(customgoal: str):
manager = ManagerAgent()
result = manager.executegoal(customgoal)
print("n" + "="60)
print("FINAL OUTPUT")
print("="60)
print(result["finaloutput"])
return result
if name == "main":
print("🤖 TinyLlama Manager Agent - Fully Local Multi-Agent System")
print("="60)
print("Powered by TinyLlama 1.1B - Efficient and Fastn")
exampleexplanation()
print("n💡 Try these examples:")
print(" - examplecoding()")
print(" - examplecustomtask('your custom goal here')")
Summary
This tutorial showcases how to architect a modular, transparent multi-agent AI system that operates entirely offline. By decomposing complex goals, intelligently assigning tasks to specialized agents, and synthesizing their outputs, we achieve a robust human-AI collaboration pipeline. This approach highlights the potential of local agent orchestration frameworks, offering flexibility, control, and efficiency without external dependencies.
