CP437 conversion functions

This commit is contained in:
Vinzenz Schroeter 2024-10-13 13:42:20 +02:00
parent ce946c2fb8
commit e1ca802268
2 changed files with 65 additions and 14 deletions

View file

@ -46,7 +46,7 @@ impl Cp437Grid {
for (index, char) in value.chars().enumerate() {
if !char.is_ascii() {
return Err(InvalidChar { index, char });
return Err(Cp437LoadError::InvalidChar { index, char });
}
let is_lf = char == '\n';
@ -95,21 +95,15 @@ mod feature_cp437 {
/// An array of 256 elements, mapping most of the CP437 values to UTF-8 characters
///
/// Mostly follows CP437, except for:
/// * 0x0A & 0x0D are kept for use as line endings.
/// * 0x1A is used for SAUCE.
/// * 0x1B is used for ANSI escape sequences.
///
/// These exclusions should be fine since most programs can't even use them
/// without issues. And this makes rendering simpler too.
/// Mostly follows CP437, except 0x0A, which is kept for use as line ending.
///
/// See <https://en.wikipedia.org/wiki/Code_page_437#Character_set>
///
/// Copied from https://github.com/kip93/cp437-tools. License: GPL-3.0
/// Mostly copied from https://github.com/kip93/cp437-tools. License: GPL-3.0
#[rustfmt::skip]
const CP437_TO_UTF8: [char; 256] = [
/* 0X */ '\0', '☺', '☻', '♥', '♦', '♣', '♠', '•', '◘', '○', '\n', '♂', '♀', '\r', '♫', '☼',
/* 1X */ '►', '◄', '↕', '‼', '¶', '§', '▬', '↨', '↑', '↓', '', '', '∟', '↔', '▲', '▼',
pub const CP437_TO_UTF8: [char; 256] = [
/* 0X */ '\0', '☺', '☻', '♥', '♦', '♣', '♠', '•', '◘', '○', '\n', '♂', '♀', '', '♫', '☼',
/* 1X */ '►', '◄', '↕', '‼', '¶', '§', '▬', '↨', '↑', '↓', '→', '←', '∟', '↔', '▲', '▼',
/* 2X */ ' ', '!', '"', '#', '$', '%', '&', '\'','(', ')', '*', '+', ',', '-', '.', '/',
/* 3X */ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '=', '>', '?',
/* 4X */ '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
@ -187,6 +181,28 @@ mod feature_cp437 {
grid
}
}
/// Convert the provided bytes to UTF-8.
pub fn cp437_to_str(cp437: &[u8]) -> String {
cp437.iter().map(move |char| cp437_to_char(*char)).collect()
}
/// Convert a single CP-437 character to UTF-8.
pub fn cp437_to_char(cp437: u8) -> char {
CP437_TO_UTF8[cp437 as usize]
}
/// Convert the provided text to CP-437 bytes.
///
/// Characters that are not available are mapped to '?'.
pub fn str_to_cp437(utf8: &str) -> Vec<u8> {
utf8.chars().map(char_to_cp437).collect()
}
/// Convert a single UTF-8 character to CP-437.
pub fn char_to_cp437(utf8: char) -> u8 {
*UTF8_TO_CP437.get(&utf8).unwrap_or(&MISSING_CHAR_CP437)
}
}
#[cfg(test)]
@ -219,7 +235,7 @@ mod tests {
#[cfg(test)]
#[cfg(feature = "cp437")]
mod tests_feature_cp437 {
use crate::{CharGrid, Cp437Grid};
use super::*;
#[test]
fn round_trip_cp437() {
@ -228,4 +244,39 @@ mod tests_feature_cp437 {
let actual = CharGrid::from(&cp437);
assert_eq!(actual, utf8);
}
#[test]
fn convert_str() {
// test text from https://int10h.org/oldschool-pc-fonts/fontlist/font?ibm_bios
let utf8 = r#"A quick brown fox jumps over the lazy dog.
0123456789 ¿?¡!`'"., <>()[]{} &@%*^#$\/
* Wieniläinen sioux'ta puhuva ökyzombie diggaa Åsan roquefort-tacoja.
* Ça me fait peur de fêter noël , sur cette île bizarroïde une mère et sa môme essaient de me tuer avec un gâteau à la cigüe brûlé.
* Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich.
* El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y frío, añoraba a su querido cachorro.
.·°·.
$ ¢ £ ¥
dx Σ x²·δx
"#;
let cp437 = str_to_cp437(utf8);
let actual = cp437_to_str(&*cp437);
assert_eq!(utf8, actual)
}
#[test]
fn convert_invalid() {
assert_eq!(cp437_to_char(char_to_cp437('😜')), '?');
}
}

View file

@ -59,7 +59,7 @@ mod command_code;
mod compression;
mod compression_code;
mod connection;
mod cp437;
pub mod cp437;
mod data_ref;
mod grid;
mod origin;