feat: clustering for notes
This commit is contained in:
parent
8039a18d82
commit
6d4bb0c6cb
81
exp.ts
81
exp.ts
|
@ -1,3 +1,80 @@
|
|||
import { initVectorStoreSync } from "./tools/notes-vectors";
|
||||
import { Client } from "pg";
|
||||
import skmeans from "skmeans";
|
||||
|
||||
initVectorStoreSync();
|
||||
const config = {
|
||||
postgresConnectionOptions: {
|
||||
host: "127.0.0.1",
|
||||
port: 5432,
|
||||
user: "postgres",
|
||||
password: "defaultpwd",
|
||||
database: "postgres",
|
||||
},
|
||||
tableName: "anya",
|
||||
columns: {
|
||||
idColumnName: "id",
|
||||
vectorColumnName: "vector",
|
||||
},
|
||||
};
|
||||
|
||||
// Fetch embeddings from PostgreSQL with data inspection
|
||||
async function fetchEmbeddings(): Promise<{ id: string; vector: number[] }[]> {
|
||||
const client = new Client(config.postgresConnectionOptions);
|
||||
await client.connect();
|
||||
|
||||
const res = await client.query(
|
||||
`SELECT ${config.columns.idColumnName} as id, ${config.columns.vectorColumnName} as vector
|
||||
FROM ${config.tableName} LIMIT 5`
|
||||
);
|
||||
await client.end();
|
||||
|
||||
// Inspect the data format of each vector
|
||||
return res.rows.map((row, index) => {
|
||||
console.log(`Row ${index} - Vector Type:`, typeof row.vector);
|
||||
console.log(`Row ${index} - Vector Data:`, row.vector);
|
||||
|
||||
let vector: number[] = [];
|
||||
|
||||
// Determine the correct format based on observed type
|
||||
if (Array.isArray(row.vector)) {
|
||||
vector = row.vector; // If it's already an array, use as-is
|
||||
} else if (typeof row.vector === "string") {
|
||||
vector = JSON.parse(row.vector); // If string, parse as JSON
|
||||
} else if (Buffer.isBuffer(row.vector)) {
|
||||
vector = Array.from(row.vector); // If Buffer, convert to array of numbers
|
||||
} else {
|
||||
console.error("Unknown vector format:", row.vector);
|
||||
}
|
||||
|
||||
return {
|
||||
id: row.id,
|
||||
vector,
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
// Run clustering on fetched embeddings
|
||||
async function listClusters() {
|
||||
const embeddings = await fetchEmbeddings();
|
||||
const vectors = embeddings.map((doc) => doc.vector);
|
||||
|
||||
// Validate the format and contents of the vectors
|
||||
vectors.forEach((vector, index) => {
|
||||
if (!Array.isArray(vector) || vector.some(isNaN)) {
|
||||
console.error(`Invalid vector at index ${index}:`, vector);
|
||||
}
|
||||
});
|
||||
|
||||
// Run K-means clustering with a specified number of clusters
|
||||
const k = 3; // Number of clusters
|
||||
const result = skmeans(vectors, k);
|
||||
|
||||
// Log the cluster assignment for each document
|
||||
embeddings.forEach((doc, index) => {
|
||||
console.log(`Document ID: ${doc.id}, Cluster: ${result.idxs[index]}`);
|
||||
});
|
||||
|
||||
console.log("Cluster assignments:", result.idxs);
|
||||
}
|
||||
|
||||
// Execute clustering function
|
||||
listClusters().catch(console.error);
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
"@nextcloud/files": "^3.8.0",
|
||||
"@solyarisoftware/voskjs": "^1.2.8",
|
||||
"@types/node-cron": "^3.0.11",
|
||||
"@types/skmeans": "^0.11.7",
|
||||
"@types/turndown": "^5.0.5",
|
||||
"@types/xml2js": "^0.4.14",
|
||||
"axios": "^1.7.3",
|
||||
|
@ -32,6 +33,7 @@
|
|||
"fuse.js": "^7.0.0",
|
||||
"fuzzysort": "^3.0.2",
|
||||
"i": "^0.3.7",
|
||||
"kmeans-ts": "^1.0.4",
|
||||
"langchain": "^0.0.212",
|
||||
"llamaindex": "^0.8.0",
|
||||
"mathjs": "^12.2.1",
|
||||
|
@ -48,6 +50,7 @@
|
|||
"quickchart-js": "^3.1.3",
|
||||
"resend": "^4.0.0",
|
||||
"serpapi": "^2.0.0",
|
||||
"skmeans": "^0.11.3",
|
||||
"turndown": "^7.2.0",
|
||||
"uuid": "^11.0.2",
|
||||
"whatsapp-web.js": "^1.26.0",
|
||||
|
|
|
@ -331,6 +331,24 @@ async function saveListenersToFile() {
|
|||
await fs.writeFile(LISTENERS_FILE_PATH, data, "utf-8");
|
||||
}
|
||||
|
||||
/**
|
||||
* Replaces placeholders in the format {{key}} in the template with corresponding values from the provided record.
|
||||
* If the value is not a string, it will JSON stringify it before inserting.
|
||||
*
|
||||
* @param template - The string template containing placeholders like {{key}}.
|
||||
* @param data - The record containing key-value pairs for replacement.
|
||||
* @returns The formatted string with placeholders replaced by data values.
|
||||
*/
|
||||
function replacePlaceholders(
|
||||
template: string,
|
||||
data: Record<string, any>
|
||||
): string {
|
||||
return template.replace(/{{\s*([^}]+)\s*}}/g, (_, key) => {
|
||||
const value = data[key.trim()];
|
||||
return typeof value === "string" ? value : JSON.stringify(value);
|
||||
});
|
||||
}
|
||||
|
||||
// Function to register a listener with the eventManager
|
||||
function registerListener(listener: EventListener) {
|
||||
const { eventId, description, userId, options, tool_names, notify } =
|
||||
|
@ -398,10 +416,12 @@ function registerListener(listener: EventListener) {
|
|||
|
||||
const is_voice = listener.eventId === "on_voice_message";
|
||||
const is_new_todo_note = listener.eventId === "new_todo_for_anya";
|
||||
const is_message_from_a_manager =
|
||||
listener.eventId.startsWith("message_from");
|
||||
|
||||
let attached_image: string | undefined = undefined;
|
||||
|
||||
if (is_voice || is_new_todo_note) {
|
||||
if (is_voice || is_new_todo_note || is_message_from_a_manager) {
|
||||
tools = getTools(
|
||||
contextMessage.author.username,
|
||||
contextMessage
|
||||
|
@ -449,7 +469,7 @@ function registerListener(listener: EventListener) {
|
|||
console.log("Running ASK for event listener: ", listener.description);
|
||||
|
||||
const system_prompts =
|
||||
is_voice || is_new_todo_note
|
||||
is_voice || is_new_todo_note || is_message_from_a_manager
|
||||
? await buildSystemPrompts(contextMessage)
|
||||
: undefined;
|
||||
|
||||
|
@ -494,14 +514,13 @@ function registerListener(listener: EventListener) {
|
|||
- **Will Auto Notify Creator of Listener:** ${notify ? "Yes" : "No"}
|
||||
- **Instruction:** ${listener.instruction}
|
||||
|
||||
**Important Note:**
|
||||
|
||||
- If the above event and payload does **not** match the instruction, reply with the string **"IGNORE"** to skip executing the instruction for this payload.
|
||||
|
||||
**Action Required:**
|
||||
|
||||
- Follow the instruction provided in the payload.
|
||||
- Return the notification text based on the instruction.
|
||||
|
||||
**Important Note:**
|
||||
- If the above event and payload does **not** match the instruction, reply with the string **"IGNORE"** to skip executing the instruction for this payload.
|
||||
|
||||
`;
|
||||
|
||||
const voice_prompt = `You are in voice trigger mode.
|
||||
|
@ -534,24 +553,37 @@ function registerListener(listener: EventListener) {
|
|||
Whatever you reply with will be sent to the user as a notification automatically. Do not use communication_manager to notify the same user.
|
||||
`;
|
||||
|
||||
const message_from_manager_prompt = `You just got a request from a manager.
|
||||
|
||||
The manager has sent you a message which triggered this event.
|
||||
|
||||
- Event ID: ${eventId}
|
||||
- Payload: ${JSON.stringify(payload)}
|
||||
`;
|
||||
|
||||
if (system_prompts) {
|
||||
prompt = `${system_prompts.map((p) => p.content).join("\n\n")}`;
|
||||
}
|
||||
|
||||
const response = !(is_voice || is_new_todo_note)
|
||||
? await ask({
|
||||
model: "gpt-4o-mini",
|
||||
prompt,
|
||||
tools,
|
||||
})
|
||||
: await ask({
|
||||
let promptToUse = prompt;
|
||||
let seed = `${listener.id}-${eventId}`;
|
||||
|
||||
if (is_voice) {
|
||||
promptToUse = voice_prompt;
|
||||
seed = `voice-anya-${listener.id}-${eventId}`;
|
||||
} else if (is_new_todo_note) {
|
||||
promptToUse = new_todo_note_prompt;
|
||||
seed = `todos-from-user-${listener.id}-${eventId}`;
|
||||
} else if (is_message_from_a_manager) {
|
||||
promptToUse = message_from_manager_prompt;
|
||||
seed = `message-from-manager-${listener.id}-${eventId}`;
|
||||
}
|
||||
|
||||
const response = await ask({
|
||||
model: attached_image ? "gpt-4o" : "gpt-4o-mini",
|
||||
prompt,
|
||||
message: is_voice ? voice_prompt : new_todo_note_prompt,
|
||||
prompt: promptToUse,
|
||||
image_url: attached_image ?? undefined,
|
||||
seed: `${is_voice ? "voice-anya" : "todos-from-user"}-${
|
||||
listener.id
|
||||
}-${eventId}`,
|
||||
seed,
|
||||
tools,
|
||||
});
|
||||
|
||||
|
@ -560,7 +592,7 @@ function registerListener(listener: EventListener) {
|
|||
const ignore = content?.includes("IGNORE");
|
||||
|
||||
if (ignore) {
|
||||
console.log("Ignoring event: ", content);
|
||||
console.log("Ignoring event: ", content, payload);
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
|
@ -60,6 +60,10 @@ import { communication_manager_tool } from "./communication";
|
|||
import { send_sys_log } from "../interfaces/log";
|
||||
import { init_anya_todos_watcher, init_notes_watcher } from "./notes-executer";
|
||||
import { initVectorStoreSync } from "./notes-vectors";
|
||||
import {
|
||||
dockerToolManager,
|
||||
DockerToolManagerSchema,
|
||||
} from "./software-engineer";
|
||||
|
||||
// get time function
|
||||
const GetTimeParams = z.object({});
|
||||
|
@ -460,6 +464,24 @@ Try to fix any errors that are returned at least once before sending to the user
|
|||
if user wants to create some automation based on some event.`,
|
||||
}),
|
||||
},
|
||||
{
|
||||
name: "softwareEngineerManagerTool",
|
||||
tool: zodFunction({
|
||||
function: (args) => dockerToolManager(args, context_message),
|
||||
name: "software_engineer_manager",
|
||||
schema: DockerToolManagerSchema,
|
||||
description: `Software Engineer Manager Tool.
|
||||
His name is Cody. He is a software engineer, and someone who loves technology.
|
||||
He specializes in linux and devops.
|
||||
|
||||
This tool can do anything related to what a tech person would do.
|
||||
They can scape website to search something, summerize youtube videos by just link, download full videos and more.
|
||||
This manager is like a whole other user that you are talking to.
|
||||
|
||||
When talking to this manager, you can inform the user that you asked cody for this query etc.
|
||||
`,
|
||||
}),
|
||||
},
|
||||
{
|
||||
name: "restart",
|
||||
tool: zodFunction({
|
||||
|
|
|
@ -0,0 +1,23 @@
|
|||
import { z } from "zod";
|
||||
import { zodFunction } from ".";
|
||||
import { eventManager } from "../interfaces/events";
|
||||
|
||||
const MessageAnyaSchema = z.object({
|
||||
message: z.string(),
|
||||
});
|
||||
export type MessageAnyaSchema = z.infer<typeof MessageAnyaSchema>;
|
||||
|
||||
async function message_anya({ message }: MessageAnyaSchema, event_id: string) {
|
||||
const res = await eventManager.emitWithResponse(event_id, {
|
||||
message,
|
||||
});
|
||||
return JSON.stringify(res);
|
||||
}
|
||||
export const message_anya_tool = (event_id: string) =>
|
||||
zodFunction({
|
||||
function: async (args: MessageAnyaSchema) =>
|
||||
await message_anya(args, event_id),
|
||||
name: "message_anya",
|
||||
schema: MessageAnyaSchema,
|
||||
description: "Send a message to Anya.",
|
||||
});
|
|
@ -83,12 +83,11 @@ async function handleNoteInstruction(
|
|||
request: `The following is a note that the user left a message for you in.
|
||||
The file path is: ${filePath}
|
||||
The user's instruction for you is in the file content and starts with '!!' followed by the message or a attached audio message that you can Transcribe to get the actual instructions.
|
||||
file content:
|
||||
---
|
||||
${fileContent}
|
||||
---
|
||||
|
||||
Make sure to remove the user's instruction line (line that starts with '!!') and the respective audio message if there is one after you have read it and done the necessary action.
|
||||
Note: Make sure to remove the user's instruction line (line that starts with '!!') and the respective audio message if there is one after you have read it and done the necessary action.
|
||||
|
||||
file content:
|
||||
${fileContent}
|
||||
`,
|
||||
},
|
||||
context_message
|
||||
|
|
|
@ -6,6 +6,7 @@ import {
|
|||
import { OpenAIEmbeddings } from "@langchain/openai";
|
||||
import { v4 as uuidv4 } from "uuid";
|
||||
import * as crypto from "crypto";
|
||||
import skmeans from "skmeans";
|
||||
|
||||
let isSyncing = false;
|
||||
let isCleanupRunning = false;
|
||||
|
@ -76,12 +77,15 @@ const config = {
|
|||
vectorColumnName: "vector",
|
||||
contentColumnName: "content",
|
||||
metadataColumnName: "metadata",
|
||||
clusterColumnName: "cluster",
|
||||
},
|
||||
distanceStrategy: "cosine" as DistanceStrategy,
|
||||
};
|
||||
|
||||
const vectorStore = await PGVectorStore.initialize(embeddings, config);
|
||||
|
||||
const CLUSTER_COUNT = 4;
|
||||
|
||||
// Main function to sync vector store
|
||||
export async function syncVectorStore() {
|
||||
if (isSyncing) {
|
||||
|
@ -94,6 +98,8 @@ export async function syncVectorStore() {
|
|||
console.log("Starting vector store sync...");
|
||||
const files = await getAllFiles("notes");
|
||||
|
||||
let filesIndexed = 0;
|
||||
|
||||
for (const file of files) {
|
||||
const content = `filename: ${file.filename}\n${file.content}`;
|
||||
// Calculate checksum
|
||||
|
@ -129,10 +135,10 @@ export async function syncVectorStore() {
|
|||
await vectorStore.addDocuments([document], {
|
||||
ids: [document.metadata.id],
|
||||
});
|
||||
|
||||
filesIndexed++;
|
||||
console.log(`Indexed ${file.filename}`);
|
||||
}
|
||||
|
||||
filesIndexed > 0 && (await runClustering());
|
||||
console.log("Vector store sync completed.");
|
||||
} catch (error) {
|
||||
console.error("Error during vector store sync:", error);
|
||||
|
@ -161,16 +167,19 @@ export async function cleanupDeletedFiles() {
|
|||
const dbFiles = queryResult.rows;
|
||||
const files = await getAllFiles("notes");
|
||||
const existingFilenames = files.map((file) => file.filename);
|
||||
let deletedFiles = 0;
|
||||
|
||||
for (const dbFile of dbFiles) {
|
||||
if (!existingFilenames.includes(dbFile.filename)) {
|
||||
// Delete the file from the vector store if it no longer exists in notes
|
||||
await vectorStore.delete({ ids: [dbFile.id] });
|
||||
deletedFiles++;
|
||||
console.log(
|
||||
`Deleted ${dbFile.filename} from vector store as it no longer exists.`
|
||||
);
|
||||
}
|
||||
}
|
||||
deletedFiles > 0 && (await runClustering());
|
||||
}
|
||||
|
||||
console.log("Cleanup of deleted files completed.");
|
||||
|
@ -181,12 +190,82 @@ export async function cleanupDeletedFiles() {
|
|||
}
|
||||
}
|
||||
|
||||
// Ensure the cluster column exists in the table
|
||||
async function ensureClusterColumn() {
|
||||
await vectorStore.client?.query(
|
||||
`ALTER TABLE ${config.tableName} ADD COLUMN IF NOT EXISTS ${config.columns.clusterColumnName} INT;`
|
||||
);
|
||||
console.log("Ensured cluster column exists in the database.");
|
||||
}
|
||||
|
||||
// Function to generate clusters from stored embeddings and save them to the database
|
||||
async function generateClusters(k: number) {
|
||||
// Ensure the cluster column exists before proceeding
|
||||
await ensureClusterColumn();
|
||||
|
||||
const queryResult = await vectorStore.client?.query(
|
||||
`SELECT ${config.columns.idColumnName} as id, ${config.columns.vectorColumnName} as vector
|
||||
FROM ${config.tableName}`
|
||||
);
|
||||
|
||||
if (!queryResult) {
|
||||
console.log("No embeddings found in the vector store.");
|
||||
return;
|
||||
}
|
||||
|
||||
// Process embeddings and format data
|
||||
const embeddings = queryResult.rows.map((row) => {
|
||||
let vector: number[] = [];
|
||||
|
||||
// Check vector data format and convert to number array if needed
|
||||
if (Array.isArray(row.vector)) {
|
||||
vector = row.vector;
|
||||
} else if (typeof row.vector === "string") {
|
||||
vector = JSON.parse(row.vector);
|
||||
} else if (Buffer.isBuffer(row.vector)) {
|
||||
vector = Array.from(row.vector);
|
||||
} else {
|
||||
console.error("Unknown vector format:", row.vector);
|
||||
}
|
||||
|
||||
return {
|
||||
id: row.id,
|
||||
vector,
|
||||
};
|
||||
});
|
||||
|
||||
// Extract vectors for clustering
|
||||
const vectors = embeddings.map((doc) => doc.vector);
|
||||
|
||||
// Run clustering algorithm (K-means)
|
||||
const result = skmeans(vectors, k);
|
||||
|
||||
// Save each document’s cluster label in the database
|
||||
for (const [index, doc] of embeddings.entries()) {
|
||||
const cluster = result.idxs[index];
|
||||
await vectorStore.client?.query(
|
||||
`UPDATE ${config.tableName} SET ${config.columns.clusterColumnName} = $1 WHERE ${config.columns.idColumnName} = $2`,
|
||||
[cluster, doc.id]
|
||||
);
|
||||
console.log(`Document ID: ${doc.id} assigned to Cluster: ${cluster}`);
|
||||
}
|
||||
|
||||
console.log("Cluster assignments saved to database.");
|
||||
}
|
||||
|
||||
// Exported function to run clustering
|
||||
export async function runClustering() {
|
||||
const k = CLUSTER_COUNT;
|
||||
console.log("Generating clusters...");
|
||||
await generateClusters(k);
|
||||
}
|
||||
|
||||
export async function initVectorStoreSync() {
|
||||
console.log("Starting vector store sync...");
|
||||
await syncVectorStore();
|
||||
setInterval(syncVectorStore, 1000 * 60 * 2); // Every 2 minutes
|
||||
await cleanupDeletedFiles();
|
||||
setInterval(cleanupDeletedFiles, 1000 * 60 * 60 * 12); // Every 12 hours
|
||||
setInterval(cleanupDeletedFiles, 1000 * 60 * 60 * 2); // Every 12 hours
|
||||
}
|
||||
|
||||
export function semantic_search_notes(query: string, limit: number) {
|
||||
|
|
|
@ -10,6 +10,7 @@ import { semantic_search_notes, syncVectorStore } from "./notes-vectors";
|
|||
import { readFileSync, writeFileSync } from "fs";
|
||||
import { join } from "path";
|
||||
import { tmpdir } from "os";
|
||||
import { message_anya_tool } from "./message-anya";
|
||||
|
||||
// Initialize WebDAV client
|
||||
const client = createClient("http://192.168.29.85/remote.php/dav/files/raj/", {
|
||||
|
@ -425,6 +426,9 @@ Ensure the vault remains organized, filenames and paths are correct, and relaven
|
|||
You can try creating canvas files that use the open json canvas format
|
||||
|
||||
- **Today's Date:** ${new Date().toDateString()}
|
||||
- **Current Time:** ${new Date().toLocaleTimeString()}
|
||||
|
||||
You also have access to message_anya tool that can ask an ai called Anya for help with scheduling notifications reminders or even calender events for the user, you can also fetch details about the same by asking her.
|
||||
|
||||
- **ALL Vault's File structure for context:**
|
||||
---
|
||||
|
@ -443,11 +447,26 @@ ${potentially_relavent_files_paths.join("\n")}
|
|||
: ""
|
||||
}
|
||||
|
||||
- **Recently Modified Files:**
|
||||
---
|
||||
${(await getRecentFiles({})).message}
|
||||
---
|
||||
|
||||
- **User Notes/Instructions for you:**
|
||||
---
|
||||
${notesManagerPromptFiles.map((f) => f.content).join("\n")}
|
||||
---
|
||||
|
||||
- **Current User's Home page (quick-note.md):**
|
||||
---
|
||||
${
|
||||
(
|
||||
await fetchFileContents({
|
||||
path: "quick-note.md",
|
||||
})
|
||||
).message
|
||||
}
|
||||
|
||||
Note: When the user is trying to create/add a note, check the templates directory for any relevant templates if available. If available, fetch the relevant template and create the note based on the template.
|
||||
`,
|
||||
message: request,
|
||||
|
@ -501,6 +520,36 @@ export async function transcribeAudioFile({
|
|||
}
|
||||
}
|
||||
|
||||
export async function getRecentFiles({}): Promise<OperationResult> {
|
||||
const limit = 5;
|
||||
try {
|
||||
const files = await client.getDirectoryContents("notes", {
|
||||
details: true,
|
||||
deep: true,
|
||||
});
|
||||
|
||||
const fileList = Array.isArray(files) ? files : files.data;
|
||||
const sortedFiles = fileList
|
||||
.filter((file) => file.type === "file")
|
||||
.sort((a, b) => {
|
||||
const aTime = new Date(a.lastmod).getTime();
|
||||
const bTime = new Date(b.lastmod).getTime();
|
||||
return bTime - aTime;
|
||||
});
|
||||
|
||||
const latestFiles = sortedFiles
|
||||
.slice(0, limit)
|
||||
.map((file) => file.filename);
|
||||
|
||||
return {
|
||||
success: true,
|
||||
message: latestFiles.length > 0 ? latestFiles : "No files found",
|
||||
};
|
||||
} catch (error: any) {
|
||||
return { success: false, message: error.message };
|
||||
}
|
||||
}
|
||||
|
||||
// Integration into runnable tools
|
||||
export let webdav_tools: RunnableToolFunction<any>[] = [
|
||||
zodFunction({
|
||||
|
@ -552,6 +601,7 @@ export let webdav_tools: RunnableToolFunction<any>[] = [
|
|||
schema: MoveItemParams,
|
||||
description: "Move a note file or directory.",
|
||||
}),
|
||||
message_anya_tool("message_from_notes_manager"),
|
||||
zodFunction({
|
||||
function: semanticSearchNotes,
|
||||
name: "semanticSearchNotes",
|
||||
|
|
|
@ -0,0 +1,279 @@
|
|||
import { z } from "zod";
|
||||
import { $ } from "zx";
|
||||
import { zodFunction } from "./";
|
||||
import { Message } from "../interfaces/message";
|
||||
import { ask } from "./ask";
|
||||
import { memory_manager_init, memory_manager_guide } from "./memory-manager";
|
||||
import { ChatCompletion } from "openai/resources/index.mjs";
|
||||
import { eventManager } from "../interfaces/events";
|
||||
|
||||
// Schema for Docker Tool Manager input
|
||||
export const DockerToolManagerSchema = z.object({
|
||||
message: z.string(),
|
||||
wait_for_reply: z
|
||||
.boolean()
|
||||
.optional()
|
||||
.describe(
|
||||
"Wait for a reply from cody. if false or not defined cody will do the task in the background."
|
||||
),
|
||||
});
|
||||
|
||||
export type DockerToolManager = z.infer<typeof DockerToolManagerSchema>;
|
||||
|
||||
// Schema for running commands on the Docker container
|
||||
export const RunCommandParams = z.object({
|
||||
commands: z
|
||||
.array(z.string())
|
||||
.describe("An array of commands to run in the Docker container"),
|
||||
wait: z
|
||||
.boolean()
|
||||
.optional()
|
||||
.describe(
|
||||
"Wait for the command to finish before proceeding. defaults to true."
|
||||
),
|
||||
stdout: z
|
||||
.boolean()
|
||||
.optional()
|
||||
.describe(
|
||||
"Weather to return the output of the command. defaults to true. You can make this false for cases where you dont want to return the output of the command, example updating env or installing packages."
|
||||
),
|
||||
stderr: z
|
||||
.boolean()
|
||||
.optional()
|
||||
.describe("Weather to return the error of the command. defaults to true."),
|
||||
});
|
||||
export type RunCommandParams = z.infer<typeof RunCommandParams>;
|
||||
|
||||
const containerName = "anya-manager-container";
|
||||
export async function run_command({
|
||||
commands,
|
||||
wait = true,
|
||||
stderr = true,
|
||||
stdout = true,
|
||||
}: RunCommandParams): Promise<{
|
||||
stdout?: string;
|
||||
error?: string;
|
||||
failedCommand?: string;
|
||||
}> {
|
||||
// Step 1: Check if the container exists and is running
|
||||
try {
|
||||
const isRunning =
|
||||
await $`docker inspect -f '{{.State.Running}}' ${containerName}`;
|
||||
if (isRunning.stdout.trim() !== "true") {
|
||||
console.log(`Container ${containerName} is not running. Starting it...`);
|
||||
await $`docker start ${containerName}`;
|
||||
}
|
||||
} catch (checkError) {
|
||||
console.log(`Container ${containerName} does not exist. Creating it...`);
|
||||
try {
|
||||
// Create a new always-running Ubuntu container with /anya mounted
|
||||
await $`docker run -d --name anya-manager-container --restart always -v /anya:/anya python:3.10 /bin/bash -c "while true; do sleep 3600; done"`;
|
||||
} catch (createError: any) {
|
||||
console.error(
|
||||
`Error creating container ${containerName}: ${
|
||||
createError.stderr || createError.message
|
||||
}`
|
||||
);
|
||||
return { error: createError.stderr || createError.message };
|
||||
}
|
||||
}
|
||||
|
||||
if (!wait) {
|
||||
// Return early if not waiting for command to finish
|
||||
return { stdout: "Command execution started in the background." };
|
||||
}
|
||||
|
||||
// Step 2: Execute commands sequentially
|
||||
let combinedStdout = "";
|
||||
for (let i = 0; i < commands.length; i++) {
|
||||
const command = commands[i];
|
||||
console.log(
|
||||
`Executing Docker command: docker exec ${containerName} /bin/bash -c "${command}"`
|
||||
);
|
||||
|
||||
try {
|
||||
const processOutput =
|
||||
await $`docker exec ${containerName} /bin/bash -c ${command}`;
|
||||
console.log(`Command executed successfully: ${command}`);
|
||||
if (stdout) {
|
||||
combinedStdout += processOutput.stdout;
|
||||
}
|
||||
} catch (runError: any) {
|
||||
console.error(
|
||||
`Error during command execution at command index ${i}: ${
|
||||
runError.stderr || runError.message
|
||||
}`
|
||||
);
|
||||
if (stderr) {
|
||||
return {
|
||||
error: runError.stderr || runError.message,
|
||||
failedCommand: command,
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// All commands executed successfully
|
||||
console.log("All commands executed successfully.");
|
||||
return { stdout: combinedStdout || "All commands executed successfully." };
|
||||
}
|
||||
|
||||
// Tool definition for running commands in the Docker container
|
||||
export const run_command_tool = {
|
||||
name: "runCommand",
|
||||
tool: zodFunction({
|
||||
function: async (args: RunCommandParams) => await run_command(args),
|
||||
name: "run_command",
|
||||
schema: RunCommandParams,
|
||||
description:
|
||||
"Run commands in the manager's Docker container with a description of their purpose.",
|
||||
}),
|
||||
};
|
||||
|
||||
// Main Docker Tool Manager function
|
||||
export async function dockerToolManager(
|
||||
{ message, wait_for_reply }: DockerToolManager,
|
||||
context_message: Message
|
||||
): Promise<{ response: string }> {
|
||||
console.log("Docker Tool Manager invoked with message:", message);
|
||||
const toolsPrompt = `# You are Cody.
|
||||
|
||||
You are a software engineer, and someone who loves technology.
|
||||
|
||||
You specialize in linux and devops, and a python expert.
|
||||
|
||||
You exist inside a docker container named '${containerName}'.
|
||||
|
||||
The current time is: ${new Date().toLocaleString()}.
|
||||
|
||||
## Responsibilities:
|
||||
1. You have access to a docker container of image python version 3.10 (based on Debian) that you can run commands on.
|
||||
2. You can install software, update configurations, or run scripts in the environment.
|
||||
3. You can presonalise the environment to your liking.
|
||||
4. Help the user when they ask you for something to be done.
|
||||
|
||||
### Container details:
|
||||
- The container is always running.
|
||||
- The container has a volume mounted at /anya which persists data across container restarts.
|
||||
- /anya is the only directory accessible to the user.
|
||||
|
||||
## The /anya/readme.md file
|
||||
|
||||
1. You can use the file at /anya/readme.md to keep track of all the changes you make to the environment.
|
||||
|
||||
2. These changes can include installing new software, updating configurations, or running scripts.
|
||||
|
||||
3. This file can also contain any account credentials or API keys that you saved with some description so that you know what they are for.
|
||||
|
||||
4. It is important that you keep /anya/readme.md updated as to not repeat yourself, the /anya/readme.md acts as your memory.
|
||||
|
||||
The current data from /anya/readme.md is:
|
||||
\`\`\`
|
||||
${await $`cat /anya/readme.md`}
|
||||
\`\`\`
|
||||
|
||||
You can also use /anya/memories/ directory to store even more specific information incase the /anya/readme.md file gets too big.
|
||||
|
||||
Current /anya/memories/ directory contents (tree /anya/memories/ command output):
|
||||
\`\`\`
|
||||
${await $`tree /anya/memories/`}
|
||||
\`\`\`
|
||||
|
||||
You can also save scripts in /anya/scripts/ directory and run them when needed.
|
||||
|
||||
Current /anya/scripts/ directory contents (ls /anya/scripts/ command output):
|
||||
\`\`\`
|
||||
${await $`ls /anya/scripts/`}
|
||||
\`\`\`
|
||||
This directory can contain both python or any language script based on your preference.
|
||||
|
||||
When you create a script in /anya/scripts/ directory you also should create a similarly named file prefixed with instruction_ that explains how to run the script.
|
||||
|
||||
This will help you run older scripts.
|
||||
|
||||
You can also keep all your python dependencies in a virtual env inside /anya/scripts/venv/ directory.
|
||||
|
||||
You can also use the /anya/media/ dir to store media files, You can arrange them in sub folders you create as needed.
|
||||
|
||||
Current /anya/media/ directory contents (ls /anya/media/ command output):
|
||||
\`\`\`
|
||||
${await $`ls /anya/media/`}
|
||||
\`\`\`
|
||||
|
||||
|
||||
Example flow:
|
||||
User: plz let me download this youtube video https://youtube.com/video
|
||||
What you need to do:
|
||||
1. Look at the /anya/scripts/ data if there is a script to download youtube videos.
|
||||
2. If there is no script, create a new script to download youtube videos while taking the param as the youtube url and the output file path and save it in /anya/scripts/ directory and also create a instruction_download_youtube_video.md file.
|
||||
3. look at the instruction_download_youtube_video.md file to see how to run that script.
|
||||
4. Run the script with relavent params.
|
||||
5. Update the /anya/readme.md file with the changes you had to make to the environment like installing dependencies or creating new scripts.
|
||||
6. Reply with the file path of the youtube video, and anything else you want.
|
||||
|
||||
You can also leave notes for yourself in the same file for future reference of changes you make to your environment.
|
||||
`;
|
||||
|
||||
// Load tools for memory manager and Docker command execution
|
||||
const tools = [run_command_tool.tool];
|
||||
|
||||
let response: ChatCompletion;
|
||||
|
||||
if (!wait_for_reply) {
|
||||
const timestamp = new Date().toTimeString();
|
||||
setTimeout(async () => {
|
||||
const startTime = Date.now();
|
||||
try {
|
||||
response = await ask({
|
||||
model: "gpt-4o",
|
||||
prompt: `${toolsPrompt}`,
|
||||
tools: tools,
|
||||
message: message,
|
||||
});
|
||||
} catch (error: any) {
|
||||
console.error(`Error during ask function: ${error.message}`);
|
||||
return { response: `An error occurred: ${error.message}` };
|
||||
}
|
||||
const endTime = Date.now();
|
||||
const executionTime = endTime - startTime;
|
||||
console.log(`Execution time: ${executionTime}ms`);
|
||||
|
||||
eventManager.emit("message_from_cody", {
|
||||
users_request: message,
|
||||
users_request_timestamp: timestamp,
|
||||
codys_response: response.choices[0].message.content || "NULL",
|
||||
execution_time: `${executionTime}ms`,
|
||||
});
|
||||
}, 0);
|
||||
|
||||
return {
|
||||
response:
|
||||
"Cody will take care of your request in the background and ping you later through an event.",
|
||||
};
|
||||
}
|
||||
|
||||
try {
|
||||
response = await ask({
|
||||
model: "gpt-4o",
|
||||
prompt: toolsPrompt,
|
||||
tools: tools,
|
||||
message: message,
|
||||
});
|
||||
} catch (error: any) {
|
||||
console.error(`Error during ask function: ${error.message}`);
|
||||
return { response: `An error occurred: ${error.message}` };
|
||||
}
|
||||
|
||||
console.log("Docker Tool Manager response:", response);
|
||||
return { response: response.choices[0].message.content || "NULL" };
|
||||
}
|
||||
|
||||
// Tool definition for the Docker Tool Manager
|
||||
export const docker_tool_manager_tool = (context_message: Message) =>
|
||||
zodFunction({
|
||||
function: async (args: DockerToolManager) =>
|
||||
await dockerToolManager(args, context_message),
|
||||
name: "docker_tool_manager",
|
||||
schema: DockerToolManagerSchema,
|
||||
description: `Docker Tool Manager: Manages a Docker container for command execution, utilizing memory for tracking and retrieving past executions.`,
|
||||
});
|
Loading…
Reference in New Issue