plenum-bot/src/mediawiki.rs
nobody d3681e1699 mediawiki/pandoc: call directly instead of via lib
Pandoc crate is confusing and badly documented.
Using std::process::command now to avoid temporary files.
2024-12-10 21:38:35 +01:00

389 lines
15 KiB
Rust

use std::cell::OnceCell;
use std::error::Error;
use std::fs::File;
use std::io::{Read, Write};
use std::process::{Command, Output, Stdio};
use colored::Colorize;
use reqwest::blocking::Client;
use serde::Deserialize;
use serde_json::json;
use crate::config_spec::{CfgField, CfgGroup};
pub const CONFIG: CfgGroup<'static> = CfgGroup {
name: "wiki",
description: "API Settings for Mediawiki",
fields: &[
CfgField::Default {
key: "server-url",
default: "https://wiki.berlin.ccc.de",
description: "Server running the wiki.",
},
CfgField::Default {
key: "http-user",
default: "cccb-wiki",
description: "HTTP basic auth user name.",
},
CfgField::Password {
key: "http-password",
description: "HTTP basic auth password."
},
CfgField::Default {
key: "api-user",
default: "PlenumBot@PlenumBot-PW2",
description: "API Username associated with the bot account used for edits.",
},
CfgField::Password {
key: "api-secret",
description: "API secret / \"password\" used for authenticating as the bot.",
},
CfgField::Default {
key: "plenum-page",
default: "Plenum",
description: "The name of the wiki page where all new plenum pages will be linked.",
},
CfgField::Default {
key: "eta",
default: "no ETA, program never ran",
description: "ETA message for estimating time the program takes."
}
],
};
pub struct Mediawiki {
server_url: String,
http_user: String,
http_password: String,
api_user: String,
api_secret: String,
is_dry_run: bool,
login_token: OnceCell<String>,
csrf_token: OnceCell<String>,
plenum_main_page_name: String,
client: Client,
}
impl std::fmt::Debug for Mediawiki {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Mediawiki")
.field("server_url", &self.server_url)
.field("http_user", &self.http_user)
.field("http_password", &"*****")
.field("is_dry_run", &self.is_dry_run)
.field("client", &self.client)
.finish()
}
}
pub enum ValidRequestTypes {
Get,
Post,
PostForEditing
}
impl Mediawiki {
pub fn new(
server_url: &str, http_auth_user: &str, http_auth_password: &str, api_user: &str, api_secret: &str, is_dry_run: bool, plenum_main_page_name: &str,
) -> Self {
Self {
server_url: server_url.to_string(),
http_user: http_auth_user.to_string(),
http_password: http_auth_password.to_string(),
api_user: api_user.to_string(),
api_secret: api_secret.to_string(),
is_dry_run,
login_token: OnceCell::new(),
csrf_token: OnceCell::new(),
plenum_main_page_name: plenum_main_page_name.to_string(),
client: Client::builder().cookie_store(true).build().unwrap(),
}
}
pub fn get_login_token(&self) -> Result<(), Box<dyn Error>> {
let url =
format!("{}/api.php?", self.server_url);
let params: Box<[(&str, &str)]> = Box::from( [
("format", "json"),
("meta", "tokens"),
("type", "login"),
("action", "query")
]);
let resp = self.make_request(url, params, ValidRequestTypes::Get)?;
let response_deserialized: QueryResponseLogin = serde_json::from_str(&resp)?;
self.login_token.set(response_deserialized.query.tokens.logintoken)?;
Ok(())
}
pub fn login (&self) -> Result<String, Box<dyn Error>> {
let url = format!("{}/api.php?", self.server_url);
let params: Box<[(&str, &str)]> = Box::from([
("lgname", self.api_user.as_str()),
("lgpassword", self.api_secret.as_str()),
("lgtoken", self.login_token.get().unwrap()),
("action", "login")
]);
let resp: Result<String, Box<dyn Error>> = self.make_request(url, params, ValidRequestTypes::Post);
Ok(resp?)
}
pub fn get_csrf_token(&self) -> Result<(), Box<dyn Error>> {
let url =
format!("{}/api.php?", self.server_url);
let params: Box<[(&str, &str)]> = Box::from([
("format", "json"),
("meta", "tokens"),
("formatversion", "2"),
("action", "query")
]);
let resp: String = self.make_request(url, params, ValidRequestTypes::Get)?;
let response_deserialized: QueryResponseCsrf = serde_json::from_str(&resp)?;
self.csrf_token.set(response_deserialized.query.tokens.csrftoken)?;
Ok(())
}
pub fn make_request(&self, url: String, params: Box<[(&str, &str)]>, request_type: ValidRequestTypes) -> Result<String, Box<dyn Error>> {
let resp: Result<String, Box<dyn Error>> = match
match request_type {
ValidRequestTypes::Get => {
self
.client
.get(url)
//.basic_auth(&self.http_user, Some(&self.http_password)) ZU TESTZWECKEN ENTFERNT
.query(&params)
.send()
}
ValidRequestTypes::Post | ValidRequestTypes::PostForEditing => {
self
.client
.post(url)
//.basic_auth(&self.http_user, Some(&self.http_password)) ZU TESTZWECKEN ENTFERNT
.form(&params)
.send()
}
}
{
Ok(response) => {
if response.status().is_success() {
match request_type {
ValidRequestTypes::PostForEditing => Ok(response.text()?),
_ => Ok(response.text()?)
}
}
else {
Err(format!("Failed to connect to wiki server: HTTP status code {}", response.status()).into())
}
}
Err(e) => {
if e.is_connect() {
Err(format!("Failed to connect to wiki server. Please check your internet connection or the server URL.\n(Error: {})", e).into())
} else {
Err(format!("An error occurred while sending the request to the wiki server: {}", e).into())
}
}
};
resp
}
/// Creates a completely new wiki page with page_content and page_title as inputs
pub fn new_wiki_page (&self, page_title: &str, page_content: &str) -> Result<String, Box<dyn Error>> {
// action=edit&format=json&title=Wikipedia:Sandbox&appendtext=Hello&token=sampleCsrfToken123+\
let url =
format!("{}/api.php?", self.server_url);
let params: Box<[(&str, &str)]> = Box::from([
("action", "edit"), // Create and edit pages.
("format", "json"),
("title", page_title), // Title of the page to edit. Cannot be used together with pageid.
("text", page_content), // Add this text to the end of the page or section. Overrides text.
("token", self.csrf_token.get().unwrap()), // A "csrf" token retrieved from action=query&meta=tokens
("bot", "true"), // Mark this edit as a bot edit.
]);
let request_result = self.make_request(url, params, ValidRequestTypes::Post);
self.update_plenum_page(page_title)?;
request_result
}
/// Downloads the main Plenum Page from Mediawiki, inserts the Link to the new Page and replaces the content of the mediawiki-
pub fn update_plenum_page (&self, new_page_title_to_link_to: &str) -> Result<(), Box<dyn Error>> {
// 1. Download Plenum page content
let page_content = self.get_page_content(&self.plenum_main_page_name)?;
println!("---\nPage Content: {}\n---", page_content.red());
let current_year = "2024"; // TODO: Datumslogik einbauen
let year_section = format!("=== {} ===\n", current_year);
if page_content.contains(&year_section) {
let mut content_split: Vec<&str> = page_content.split(&year_section).collect();
println!("Length: {}", content_split.len());
let rest_of_content = content_split.pop().unwrap_or_default();
let updated_section = format!("{}{}\n* {}", content_split.join(&year_section), year_section, new_page_title_to_link_to);
//format!("{}{}", updated_section, rest_of_content)
}
Ok(())
}
pub fn get_page_content (&self, page_title: &str) -> Result<String, Box<dyn Error>> {
let url =
format!("{}/api.php?", self.server_url);
let params: Box<[(&str, &str)]> = Box::from([
("action", "parse"), // Create and edit pages.
("prop", "wikitext"),
("format", "json"),
("page", page_title),
("formatversion", "2"),
]);
let resp = self.make_request(url, params, ValidRequestTypes::Get)?;
let resp = json!(resp);
Ok(resp["parse"]["wikitext"].to_string())
}
pub fn get_page_section_title (&self, page_title: &str, section_number: &str) -> Result<String, Box<dyn Error>> {
let url =
format!("{}/api.php?", self.server_url);
let params: Box<[(&str, &str)]> = Box::from([
("action", "parse"), // Create and edit pages.
("contentmodel", "wikitext"),
("format", "json"),
("page", page_title),
]);
let resp = self.make_request(url, params, ValidRequestTypes::Get)?;
todo!()
//let response_deserialized = serde_json::from_str(&resp)?;
//Ok(response_deserialized["parse"])
}
pub fn edit_section (&self, page_title: &str, text_to_prepend: &str, section_number: &str) -> Result<String, Box<dyn Error>> {
let url =
format!("{}/api.php?", self.server_url);
let params: Box<[(&str, &str)]> = Box::from([
("action", "edit"), // Create and edit pages.
("format", "json"),
("title", page_title), // Title of the page to edit. Cannot be used together with pageid.
("section", section_number), // Section identifier. 0 for the top section, new for a new section. Often a positive integer, but can also be non-numeric
("prependtext", text_to_prepend), // Add this text to the end of the page or section. Overrides text.
("token", self.csrf_token.get().unwrap()), // A "csrf" token retrieved from action=query&meta=tokens
("bot", "true"), // Mark this edit as a bot edit.
]);
let request_result = self.make_request(url, params, ValidRequestTypes::Post);
request_result
}
}
pub fn pad_ins_wiki(old_pad_content: String, wiki: &Mediawiki) -> Result<(), Box<dyn Error>> {
// Login to Wiki and get required tokens for logging in and writing
wiki.get_login_token()?;
eprintln!("AUTH Done");
let login_result = wiki.login()?;
eprintln!("LOGIN Done");
wiki.get_csrf_token()?;
eprintln!("CSRF Done");
eprintln!("---LOGIN RESULT:---\n{:?}\n-----------", login_result);
// Convert to mediawiki and make new page
let pad_converted = convert_md_to_mediawiki(old_pad_content);
eprintln!("Das kommt ins Wiki: {}", pad_converted);
let page_title = "Page Test 5";
let page_title = format!("{}/{}", wiki.plenum_main_page_name, page_title); // Example: Plenum/13._August_2024
wiki.new_wiki_page(&page_title, &pad_converted)?;
Ok(())
}
/// Converts one file type into another using pandoc and saves the result as a txt file
fn pandoc_convert(old_pad_content: String) -> Result<String, Box<dyn Error>> {
let mut cmd = Command::new("pandoc")
.args(["--from", "markdown", "--to", "mediawiki", "--no-highlight"])
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.spawn()?;
if let Some(mut stdin) = cmd.stdin.take() {
stdin.write_all(old_pad_content.as_bytes())?;
}
let mut output = String::new();
if let Some(mut stdout) = cmd.stdout.take() {
stdout.read_to_string(&mut output)?;
}
let mut errmsg = String::new();
if let Some(mut stderr) = cmd.stderr.take() {
stderr.read_to_string(&mut errmsg)?;
}
let status = cmd.wait()?;
if status.success() {
Ok(output)
} else {
Err( format!("Pandoc error, exit {:?}\n{}", status, errmsg).into() )
}
/*
//Convert Markdown into Mediawiki
// Vanilla pandoc Befehl: pandoc --from markdown --to mediawiki --no-highlight
let mut p = pandoc::new();
p.set_input(pandoc::InputKind::Pipe(old_pad_content));
p.set_input_format(input_format, vec![]);
p.set_output(pandoc::OutputKind::Pipe); // File(output_filepath.parse().unwrap()));
p.set_output_format(output_format, vec![]);
let output = p.execute()?;
Ok(output.into())
*/
}
/// Reads a text file from a specified path and returns it as a String
fn read_txt_file(filepath: &str) -> String {
let mut file = File::open(filepath)
.unwrap_or_else(|_| panic!("Fehler beim öffnen der Textdatei mit Pfad {filepath}!"));
let mut contents = String::new();
file.read_to_string(&mut contents)
.expect("Fehler beim auslesen der MediaWiki-Textdatei!");
contents
}
/// Takes a Sting in the Markdown format and returns a String in the mediawiki Format
fn convert_md_to_mediawiki(old_pad_content: String) -> String {
// TODO: use tempfile="3.3", make it a NamedTempFile::new()?;
// or alternatively use piped stdout to avoid files entirely
let output_filepath: &str = "./pandoc_mediawiki.txt";
pandoc_convert(old_pad_content)
.expect("Fehler beim Umwandeln des und speichern des Pads in eine mediawiki-Textdatei");
let temp = read_txt_file(output_filepath);
println!("TEMP: {}", temp.purple());
temp
}
/*
fn create_title (nächster_plenumstermin: String) {
let date_simple = NaiveDate::from(nächster_plenumstermin);
let wiki_page_title = format!("{} {} {}", date_simple.day(), LongMonthName[date_simple.month()], date_simple.year());
}
*/
// This has to be defined that way, because both in the login and csrf token,
// the response contains two \\ characters which break the usual deserialization
#[derive(Deserialize)]
struct QueryResponseLogin {
#[allow(dead_code)]
batchcomplete: String,
query: QueryTokensLogin,
}
#[derive(Deserialize)]
struct QueryTokensLogin {
tokens: TokensLogin,
}
#[derive(Deserialize)]
struct TokensLogin {
logintoken: String,
}
#[derive(Deserialize)]
struct QueryResponseCsrf {
#[allow(dead_code)]
batchcomplete: bool,
query: crate::mediawiki::QueryTokensCsrf,
}
#[derive(Deserialize)]
struct QueryTokensCsrf {
tokens: crate::mediawiki::TokensCsrf,
}
#[derive(Deserialize)]
struct TokensCsrf {
csrftoken: String,
}