plenum-bot/src/hedgedoc.rs

207 lines
7.8 KiB
Rust
Raw Normal View History

use crate::config_spec::{CfgField, CfgGroup};
2024-12-20 09:37:52 +01:00
use ollama_rs::generation::completion::request::GenerationRequest;
use regex::Regex;
use reqwest::blocking::Client;
2024-08-02 22:29:22 +02:00
use reqwest::blocking::Response;
use std::error::Error;
2024-12-20 09:37:52 +01:00
use ollama_rs;
use tokio::runtime::Runtime;
pub const CONFIG: CfgGroup<'static> = CfgGroup {
name: "hedgedoc",
description: "HedgeDoc markdown pad server settings",
fields: &[
CfgField::Default {
key: "server-url",
default: "https://md.berlin.ccc.de",
description: "Hedgedoc server storing the pads.",
},
CfgField::Default {
key: "template-name",
default: "plenum-template",
description: "Name of the pad containing the template to use.",
},
CfgField::Generated {
key: "last-id",
generator: make_pad_id,
generator_description: "Makes a new pad that's completely empty.",
description: "ID of last plenum's pad.",
},
CfgField::Generated {
key: "next-id",
generator: make_pad_id,
generator_description: "Makes a new pad that's completely empty.",
description: "ID of next plenum's pad.",
},
2024-12-20 09:37:52 +01:00
CfgField::Default {
key: "ollama-pre-prompt",
default: "You are an expert executive assistant responsible for providing concise summaries of meeting minutes. Your role is to identify and report only the most critical and actionable information. Anything that does not directly inform decisions, require action, or result in significant changes in behavior or plans must be omitted entirely.
Follow these principles:
- Appointments: Include dates, times, and locations of scheduled or rescheduled meetings.
- Action Items: Summarize who is responsible, what is required, and any deadlines.
- Changes: Highlight any shifts in priorities, strategies, or plans that necessitate a change in approach or behavior.
- Decisions: Note decisions made during the meeting, focusing on outcomes or implications.
Do not include other topics, discussions, background information, or contextual details unless they are essential for understanding the critical points. Output a TL;DR of no more than 35 sentences in German.
",
description: "pre-prompt for ollama.",
},
CfgField::Default {
key: "ollama-address",
default: "http://localhost",
description: "address to the machine where ollama should be used.",
},
CfgField::Default {
key: "ollama-port",
default: "11434",
description: "port to the machine where ollama should be used.",
},
CfgField::Default {
key: "ollama-summaries-enabled",
default: "False",
description: "determines whether ollama summaries should be used. can be either 'True' or 'False'.",
},
],
};
2024-08-17 21:52:54 +02:00
#[derive(Debug)]
pub struct HedgeDoc {
2024-08-02 22:29:22 +02:00
server_url: String,
is_dry_run: bool,
client: Client,
}
impl HedgeDoc {
2024-08-02 22:29:22 +02:00
pub fn new(server_url: &str, is_dry_run: bool) -> Self {
Self { server_url: server_url.to_string(), is_dry_run, client: Client::new() }
}
2024-08-02 22:29:22 +02:00
pub fn format_url(&self, pad_name: &str) -> String {
format!("{}/{}", self.server_url, pad_name)
}
2024-08-02 22:29:22 +02:00
fn format_action(&self, pad_name: &str, verb: &str) -> String {
format!("{}/{}/{}", self.server_url, pad_name, verb)
}
2024-08-02 22:29:22 +02:00
fn do_request(&self, url: &str) -> Result<Response, Box<dyn Error>> {
match self.client.get(url).send() {
Ok(response) => {
if response.status().is_success() {
Ok(response)
} else {
Err(format!(
"Failed to connect to hedgedoc server: HTTP status code {}",
response.status()
)
.into())
}
},
Err(e) => {
if e.is_connect() {
Err("Failed to connect to hedgedoc server. Please check your internet connection or the server URL.".into())
} else {
Err(format!(
"An error occurred while sending the request to the hedgedoc server: {}",
e
)
.into())
}
},
}
}
2024-08-02 22:29:22 +02:00
fn get_id_from_response(&self, res: Response) -> String {
res.url().to_string().trim_start_matches(&format!("{}/", self.server_url)).to_string()
}
2024-08-02 22:29:22 +02:00
pub fn download(&self, pad_name: &str) -> Result<String, Box<dyn Error>> {
Ok(self.do_request(&self.format_action(pad_name, "download"))?.text()?)
}
2024-08-02 22:29:22 +02:00
pub fn create_pad(&self) -> Result<String, Box<dyn Error>> {
if self.is_dry_run {
todo!("NYI: sane dry-run behavior")
}
2024-08-02 22:29:22 +02:00
let res = self.do_request(&format!("{}/new", self.server_url)).unwrap();
if res.status().is_success() {
Ok(self.get_id_from_response(res))
} else {
2024-08-02 22:29:22 +02:00
Err(format!("Failed to create pad {}", res.status()).into())
}
}
2024-08-02 22:29:22 +02:00
pub fn import_note(&self, id: Option<&str>, content: String) -> Result<String, Box<dyn Error>> {
if self.is_dry_run {
todo!("NYI: sane dry-run behavior")
}
let url = match id {
2024-08-02 22:29:22 +02:00
Some(id) => self.format_url(&format!("new/{id}")),
None => self.format_url("new"),
};
2024-08-02 22:29:22 +02:00
let res =
self.client.post(&url).header("Content-Type", "text/markdown").body(content).send()?;
if res.status().is_success() {
Ok(self.get_id_from_response(res))
} else {
2024-08-02 22:29:22 +02:00
Err(format!("Failed to import note: {}", res.status()).into())
}
}
}
2024-08-24 01:24:41 +02:00
pub fn extract_metadata(pad_content: String) -> String {
let re_yaml = Regex::new(r"(?s)---\s*(.*?)\s*(?:\.\.\.|---)").unwrap();
re_yaml.captures_iter(&pad_content).map(|c| c[1].to_string()).collect::<Vec<_>>().join("\n")
}
pub fn strip_metadata(pad_content: String) -> String {
let re_yaml = Regex::new(r"(?s)---\s*.*?\s*(?:\.\.\.|---)").unwrap();
let pad_content = re_yaml.replace_all(&pad_content, "").to_string();
let re_comment = Regex::new(r"(?s)<!--.*?-->").unwrap();
let content_without_comments = re_comment.replace_all(&pad_content, "").to_string();
content_without_comments.trim().to_string()
2024-08-24 01:24:41 +02:00
}
pub fn summarize(pad_content: String) -> String {
// 1. remove HTML comments
2024-08-24 01:24:41 +02:00
let pad_content = strip_metadata(pad_content);
// 2. accumulate topic lines
let re_header = Regex::new(r"^\s*##(#*) TOP ([\d.]+\s*.*?)\s*#*$").unwrap();
let mut result: Vec<String> = Vec::new();
for line in pad_content.lines() {
if let Some(captures) = re_header.captures(line) {
let indent = " ".repeat(captures.get(1).unwrap().as_str().len());
let title = captures.get(2).unwrap().as_str();
result.push(format!("{}{}", indent, title));
}
2024-12-20 09:37:52 +01:00
};
result.join("\n")
}
2024-12-20 09:37:52 +01:00
pub fn summarize_with_ollama(pad_content: &str, ollama_pre_prompt: &str, ollama_address: &str, ollama_port: &u16) -> Result<String, Box<dyn Error>> {
let ollama = ollama_rs::Ollama::new(ollama_address, ollama_port.clone());
let model = "qwen2.5:32b".to_string();
let prompt = ollama_pre_prompt.to_string() + pad_content;
let rt = Runtime::new().unwrap();
let result = rt.block_on(async {
ollama.generate(GenerationRequest::new(model, prompt)).await
});
match result {
Ok(res) => {return Ok(res.response)},
Err(err) => {return Err(err.into())}
}
}
/// For the config, make a new pad ID (by actually making a pad.)
fn make_pad_id(
_key: &str, config: &crate::key_value::KeyValueStore, is_dry_run: bool,
) -> Result<String, Box<dyn Error>> {
HedgeDoc::new(&config.get("hedgedoc-server-url").unwrap(), is_dry_run).create_pad()
}