From 5aa6eb81791139f7bb608a9850d7dd3244d753bf Mon Sep 17 00:00:00 2001 From: murmeldin Date: Fri, 20 Dec 2024 09:37:52 +0100 Subject: [PATCH] ollama integration + better messages --- Cargo.lock | 39 +++++++++++++++++++++++++++++++++++++ Cargo.toml | 2 ++ src/hedgedoc.rs | 51 ++++++++++++++++++++++++++++++++++++++++++++++++- src/main.rs | 44 +++++++++++++++++++++++++++++++----------- 4 files changed, 124 insertions(+), 12 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0feffcd..a8cbba1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -114,6 +114,28 @@ version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7d902e3d592a523def97af8f317b08ce16b7ab854c1985a0c671e6f15cebc236" +[[package]] +name = "async-stream" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b5a71a6f37880a80d1d7f19efd781e4b5de42c88f0722cc13bcb6cc2cfe8476" +dependencies = [ + "async-stream-impl", + "futures-core", + "pin-project-lite", +] + +[[package]] +name = "async-stream-impl" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "async-trait" version = "0.1.83" @@ -1279,6 +1301,21 @@ dependencies = [ "memchr", ] +[[package]] +name = "ollama-rs" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46483ac9e1f9e93da045b5875837ca3c9cf014fd6ab89b4d9736580ddefc4759" +dependencies = [ + "async-stream", + "async-trait", + "log", + "reqwest", + "serde", + "serde_json", + "url", +] + [[package]] name = "once_cell" version = "1.20.2" @@ -1450,6 +1487,7 @@ dependencies = [ "log", "mediawiki", "nom", + "ollama-rs", "rand 0.9.0-beta.1", "regex", "reqwest", @@ -1458,6 +1496,7 @@ dependencies = [ "serde", "serde_json", "stdext", + "tokio", "uuid", ] diff --git a/Cargo.toml b/Cargo.toml index 1321446..7283a37 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,6 +22,8 @@ serde_json = "1.0.122" colored = "2.1.0" nom = "7.1.3" mediawiki = "0.3.1" +ollama-rs = "0.2.1" +tokio = "1.0.0" [[bin]] name = "Plenum-Bot" diff --git a/src/hedgedoc.rs b/src/hedgedoc.rs index 66be2df..658406f 100644 --- a/src/hedgedoc.rs +++ b/src/hedgedoc.rs @@ -1,8 +1,11 @@ use crate::config_spec::{CfgField, CfgGroup}; +use ollama_rs::generation::completion::request::GenerationRequest; use regex::Regex; use reqwest::blocking::Client; use reqwest::blocking::Response; use std::error::Error; +use ollama_rs; +use tokio::runtime::Runtime; pub const CONFIG: CfgGroup<'static> = CfgGroup { name: "hedgedoc", @@ -30,6 +33,37 @@ pub const CONFIG: CfgGroup<'static> = CfgGroup { generator_description: "Makes a new pad that's completely empty.", description: "ID of next plenum's pad.", }, + CfgField::Default { + key: "ollama-pre-prompt", + default: "You are an expert executive assistant responsible for providing concise summaries of meeting minutes. Your role is to identify and report only the most critical and actionable information. Anything that does not directly inform decisions, require action, or result in significant changes in behavior or plans must be omitted entirely. + +Follow these principles: + +- Appointments: Include dates, times, and locations of scheduled or rescheduled meetings. +- Action Items: Summarize who is responsible, what is required, and any deadlines. +- Changes: Highlight any shifts in priorities, strategies, or plans that necessitate a change in approach or behavior. +- Decisions: Note decisions made during the meeting, focusing on outcomes or implications. + +Do not include other topics, discussions, background information, or contextual details unless they are essential for understanding the critical points. Output a TL;DR of no more than 3–5 sentences in German. + +", + description: "pre-prompt for ollama.", + }, + CfgField::Default { + key: "ollama-address", + default: "http://localhost", + description: "address to the machine where ollama should be used.", + }, + CfgField::Default { + key: "ollama-port", + default: "11434", + description: "port to the machine where ollama should be used.", + }, + CfgField::Default { + key: "ollama-summaries-enabled", + default: "False", + description: "determines whether ollama summaries should be used. can be either 'True' or 'False'.", + }, ], }; @@ -145,10 +179,25 @@ pub fn summarize(pad_content: String) -> String { let title = captures.get(2).unwrap().as_str(); result.push(format!("{}{}", indent, title)); } - } + }; result.join("\n") } +pub fn summarize_with_ollama(pad_content: &str, ollama_pre_prompt: &str, ollama_address: &str, ollama_port: &u16) -> Result> { + let ollama = ollama_rs::Ollama::new(ollama_address, ollama_port.clone()); + let model = "qwen2.5:32b".to_string(); + let prompt = ollama_pre_prompt.to_string() + pad_content; + let rt = Runtime::new().unwrap(); + + let result = rt.block_on(async { + ollama.generate(GenerationRequest::new(model, prompt)).await + }); + match result { + Ok(res) => {return Ok(res.response)}, + Err(err) => {return Err(err.into())} + } +} + /// For the config, make a new pad ID (by actually making a pad.) fn make_pad_id( _key: &str, config: &crate::key_value::KeyValueStore, is_dry_run: bool, diff --git a/src/main.rs b/src/main.rs index 52e4cf7..e42c291 100644 --- a/src/main.rs +++ b/src/main.rs @@ -419,7 +419,7 @@ fn do_reminder( NYI!("trace/verbose annotations"); // fetch current pad contents & summarize let (current_pad_id, _pad_content, toc, n_topics) = get_pad_info(config, hedgedoc); - let old_toc = config.get("state-toc").unwrap_or_default(); + let old_toc = config.get("state-toc")?; // construct email let human_date = plenum_day.format("%d.%m.%Y"); let subject = if n_topics == 0 { @@ -473,9 +473,31 @@ fn do_protocol( NYI!("trace/verbose annotations"); let (current_pad_id, pad_content_without_cleanup, toc, n_topics) = get_pad_info(config, hedgedoc); + let pad_content = hedgedoc::strip_metadata(pad_content_without_cleanup.clone()); + let ollama_enabled: bool = match &config["hedgedoc-ollama-summaries-enabled"] { + "True" => true, + "False" => false, + _ => { + eprintln!("Achtung, ollama_enabled ist nicht definiert, bitte die Konfiguration überprüfen! Es wird False genutzt!"); + false + }, + }; + let summary_or_toc: String = if ollama_enabled && !toc.is_empty() { + let ollama_port: &u16 = &config["hedgedoc-ollama-port"].parse::().expect("The ollama port wasn't given a valid u16 port, please check the config"); + match hedgedoc::summarize_with_ollama(&pad_content, &config["hedgedoc-ollama-pre-prompt"], &config["hedgedoc-ollama-address"], ollama_port) { + Ok(ollama_summary) => ollama_summary, + Err(err) => { + eprintln!("Ollama failed, continuing with standard toc. This was the error Message: {err}"); + toc.clone() + } + } + } else { + verboseln!("Ollama is disabled, just using toc"); + toc.clone() + }; if !toc.is_empty() { + verboseln!("There were TOPs on this Plenum"); let human_date = plenum_day.format("%d.%m.%Y"); - let pad_content = hedgedoc::strip_metadata(pad_content_without_cleanup.clone()); let subject = format!("Protokoll vom Plenum am {human_date}"); let pad_content = pad_content.replace("[toc]", &toc); let body = format!( @@ -491,8 +513,8 @@ fn do_protocol( mediawiki::pad_ins_wiki(pad_content, wiki, plenum_day)?; config.set("state-name", &ProgramState::Logged.to_string()).ok(); } else { + verboseln!("There were no TOPs on this Plenum"); let human_date = plenum_day.format("%d.%m.%Y"); - let pad_content = hedgedoc::strip_metadata(pad_content_without_cleanup.clone()); let subject = format!("Protokoll vom ausgefallenem Plenum am {human_date}"); let pad_content = pad_content.replace("[toc]", &toc); let body = format!( @@ -512,37 +534,37 @@ fn do_protocol( &config["matrix-homeserver-url"], &config["matrix-user-id"], &config["matrix-access-token"], - "!YduwXBXwKifXYApwKF:catgirl.cloud", //&config["room-id-for-short-messages"], - "!YduwXBXwKifXYApwKF:catgirl.cloud", //&config["room-id-for-long-messages"], + &config["matrix-room-id-for-short-messages"], + &config["matrix-room-id-for-long-messages"], is_dry_run(), ); // Send the matrix room message let human_date = plenum_day.format("%d.%m.%Y"); - let pad_content = hedgedoc::strip_metadata(pad_content_without_cleanup); let pad_content = pad_content.replace("[toc]", &toc); let long_message = format!( "Anbei das Protokoll vom {human_date}, ab sofort auch im Wiki zu finden.\n\n\ - Das Pad für das nächste Plenum ist zu finden unter {}/{}.\nDie Protokolle der letzten Plena findet ihr im wiki unter {}/index.php?title={}.\n\n", + Das Pad für das nächste Plenum ist zu finden unter {}/{}.\nDie Protokolle der letzten Plena findet ihr im wiki unter {}/index.php?title={}.\n**Hier die Zusammenfassung:**\n{}", &config["hedgedoc-server-url"], &config["hedgedoc-next-id"], &config["wiki-server-url"], &config["wiki-plenum-page"], + &summary_or_toc ); let full_long_message = format!( - "{}\n\n{}\n\n{}", + "{}\n{}{}", &config["text-email-greeting"], long_message, &config["text-email-signature"] ); let short_message = format!( "Das letzte Plenum hatte Anbei das Protokoll vom {human_date}, ab sofort auch im Wiki zu finden.\n\n\ - Das Pad für das nächste Plenum ist zu finden unter {}/{}.\nDie Protokolle der letzten Plena findet ihr im wiki unter {}/index.php?title={}.\n\n", + Das Pad für das nächste Plenum ist zu finden unter {}/{}.\nDie Protokolle der letzten Plena findet ihr im wiki unter {}/index.php?title={}.", &config["hedgedoc-server-url"], &config["hedgedoc-next-id"], &config["wiki-server-url"], &config["wiki-plenum-page"] ); let full_short_message = format!( - "{}\n\n{}\n\n{}", - &config["text-email-greeting"], short_message, &config["text-email-signature"] + "{}\n{}{}", + &config["text-email-greeting"], short_message, &config["text-email-signature"].strip_prefix("[").unwrap_or(&config["text-email-signature"]).strip_suffix("]").unwrap_or(&config["text-email-signature"]) ); matrix.send_short_and_long_messages_to_two_rooms(&full_short_message, &full_long_message)?; Ok(())