From ed455f7ea406c01b2eca71e9a7f601531004c3e8 Mon Sep 17 00:00:00 2001 From: William Casarin Date: Wed, 22 Jan 2025 13:11:30 -0800 Subject: [PATCH] add tokenator crate also remove a lot of the "advanced" token parsing style which was a bit too verbose for my tastes Signed-off-by: William Casarin --- Cargo.lock | 5 + Cargo.toml | 3 +- crates/notedeck_columns/Cargo.toml | 1 + crates/notedeck_columns/src/accounts/route.rs | 4 +- crates/notedeck_columns/src/route.rs | 3 +- crates/notedeck_columns/src/storage/decks.rs | 1 + crates/notedeck_columns/src/storage/mod.rs | 6 - .../src/storage/token_parser.rs | 512 ------------------ crates/notedeck_columns/src/timeline/kind.rs | 49 +- crates/notedeck_columns/src/timeline/route.rs | 93 ++-- crates/notedeck_columns/src/ui/add_column.rs | 36 +- crates/tokenator/Cargo.toml | 7 + crates/tokenator/README.md | 5 + crates/tokenator/src/lib.rs | 220 ++++++++ 14 files changed, 350 insertions(+), 595 deletions(-) delete mode 100644 crates/notedeck_columns/src/storage/token_parser.rs create mode 100644 crates/tokenator/Cargo.toml create mode 100644 crates/tokenator/README.md create mode 100644 crates/tokenator/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index 5ec5ce8f..b2231563 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2828,6 +2828,7 @@ dependencies = [ "strum_macros", "tempfile", "thiserror 2.0.7", + "tokenator", "tokio", "tracing", "tracing-appender", @@ -4527,6 +4528,10 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" +[[package]] +name = "tokenator" +version = "0.1.0" + [[package]] name = "tokio" version = "1.42.0" diff --git a/Cargo.toml b/Cargo.toml index c2c18722..fdb7a033 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,7 +5,7 @@ members = [ "crates/notedeck_chrome", "crates/notedeck_columns", - "crates/enostr", + "crates/enostr", "crates/tokenator", ] [workspace.dependencies] @@ -34,6 +34,7 @@ nostrdb = { git = "https://github.com/damus-io/nostrdb-rs", rev = "2111948b078b2 notedeck = { path = "crates/notedeck" } notedeck_chrome = { path = "crates/notedeck_chrome" } notedeck_columns = { path = "crates/notedeck_columns" } +tokenator = { path = "crates/tokenator" } open = "5.3.0" poll-promise = { version = "0.3.0", features = ["tokio"] } puffin = { git = "https://github.com/jb55/puffin", package = "puffin", rev = "70ff86d5503815219b01a009afd3669b7903a057" } diff --git a/crates/notedeck_columns/Cargo.toml b/crates/notedeck_columns/Cargo.toml index f641b25e..c6a7de0d 100644 --- a/crates/notedeck_columns/Cargo.toml +++ b/crates/notedeck_columns/Cargo.toml @@ -12,6 +12,7 @@ crate-type = ["lib", "cdylib"] [dependencies] notedeck = { workspace = true } +tokenator = { workspace = true } bitflags = { workspace = true } dirs = { workspace = true } eframe = { workspace = true } diff --git a/crates/notedeck_columns/src/accounts/route.rs b/crates/notedeck_columns/src/accounts/route.rs index 6447fffe..befcfc87 100644 --- a/crates/notedeck_columns/src/accounts/route.rs +++ b/crates/notedeck_columns/src/accounts/route.rs @@ -1,6 +1,6 @@ use super::{AccountLoginResponse, AccountsViewResponse}; -use crate::storage::{ParseError, TokenParser, TokenSerializable, TokenWriter}; use serde::{Deserialize, Serialize}; +use tokenator::{ParseError, TokenParser, TokenSerializable, TokenWriter}; pub enum AccountsRouteResponse { Accounts(AccountsViewResponse), @@ -58,7 +58,7 @@ fn parse_accounts_route<'a>( #[cfg(test)] mod tests { use super::*; - use crate::storage::{TokenParser, TokenSerializable, TokenWriter}; + use tokenator::{TokenParser, TokenSerializable, TokenWriter}; #[test] fn test_accounts_route_serialize() { diff --git a/crates/notedeck_columns/src/route.rs b/crates/notedeck_columns/src/route.rs index 471b4456..b8d8fe26 100644 --- a/crates/notedeck_columns/src/route.rs +++ b/crates/notedeck_columns/src/route.rs @@ -4,11 +4,12 @@ use std::fmt::{self}; use crate::{ accounts::AccountsRoute, column::Columns, - storage::{ParseError, TokenParser, TokenSerializable, TokenWriter}, timeline::{kind::ColumnTitle, TimelineId, TimelineRoute}, ui::add_column::{AddAlgoRoute, AddColumnRoute}, }; +use tokenator::{ParseError, TokenParser, TokenSerializable, TokenWriter}; + /// App routing. These describe different places you can go inside Notedeck. #[derive(Clone, Copy, Eq, PartialEq, Debug)] pub enum Route { diff --git a/crates/notedeck_columns/src/storage/decks.rs b/crates/notedeck_columns/src/storage/decks.rs index 8bd1389b..ee819500 100644 --- a/crates/notedeck_columns/src/storage/decks.rs +++ b/crates/notedeck_columns/src/storage/decks.rs @@ -18,6 +18,7 @@ use crate::{ }; use notedeck::{storage, DataPath, DataPathType, Directory}; +use tokenator::{ParseError, TokenParser, TokenSerializable, TokenWriter}; pub static DECKS_CACHE_FILE: &str = "decks_cache.json"; diff --git a/crates/notedeck_columns/src/storage/mod.rs b/crates/notedeck_columns/src/storage/mod.rs index 95c88d7a..cda44eeb 100644 --- a/crates/notedeck_columns/src/storage/mod.rs +++ b/crates/notedeck_columns/src/storage/mod.rs @@ -1,11 +1,5 @@ mod decks; mod migration; -mod token_parser; pub use decks::{load_decks_cache, save_decks_cache, DECKS_CACHE_FILE}; pub use migration::{deserialize_columns, COLUMNS_FILE}; - -pub use token_parser::{ - ParseError, Payload, Token, TokenAlternatives, TokenParser, TokenPayload, TokenSerializable, - TokenWriter, UnexpectedToken, -}; diff --git a/crates/notedeck_columns/src/storage/token_parser.rs b/crates/notedeck_columns/src/storage/token_parser.rs deleted file mode 100644 index 7a855995..00000000 --- a/crates/notedeck_columns/src/storage/token_parser.rs +++ /dev/null @@ -1,512 +0,0 @@ -use crate::timeline::kind::PubkeySource; -use enostr::{NoteId, Pubkey}; - -#[derive(Debug, Clone)] -pub struct UnexpectedToken<'fnd, 'exp> { - pub expected: &'exp str, - pub found: &'fnd str, -} - -#[derive(Debug, Copy, Clone, PartialEq, Eq)] -pub enum TokenPayload { - PubkeySource, - Pubkey, - NoteId, -} - -pub struct TokenAlternatives { - /// This is the preferred token. It should be serialized this way - preferred: &'static str, - - /// These are deprecated tokens that should still be handled and parsed - deprecated: &'static [&'static str], -} - -impl TokenAlternatives { - pub const fn new(preferred: &'static str, deprecated: &'static [&'static str]) -> Self { - Self { - preferred, - deprecated, - } - } -} - -/// Token is a unified serialization helper. By specifying a list of -/// tokens for each thing you want to parse, you can type-safely parse -/// and serialize things -pub enum Token { - /// A simple identifier - Identifier(&'static str), - - /// There are multiple ways to parse this identifier - Alternatives(TokenAlternatives), - - /// Different payload types, pubkeys etc - Payload(TokenPayload), -} - -#[derive(Debug, Clone)] -pub enum Payload { - PubkeySource(PubkeySource), - Pubkey(Pubkey), - NoteId(NoteId), -} - -impl Payload { - pub fn token_payload(&self) -> TokenPayload { - match self { - Payload::PubkeySource(_) => TokenPayload::PubkeySource, - Payload::Pubkey(_) => TokenPayload::Pubkey, - Payload::NoteId(_) => TokenPayload::NoteId, - } - } - - pub fn parse_note_id(payload: Option) -> Result> { - payload - .and_then(|p| p.get_note_id().cloned()) - .ok_or(ParseError::ExpectedPayload(TokenPayload::NoteId)) - } - - pub fn parse_pubkey(payload: Option) -> Result> { - payload - .and_then(|p| p.get_pubkey().cloned()) - .ok_or(ParseError::ExpectedPayload(TokenPayload::Pubkey)) - } - - pub fn parse_pubkey_source( - payload: Option, - ) -> Result> { - payload - .and_then(|p| p.get_pubkey_source().cloned()) - .ok_or(ParseError::ExpectedPayload(TokenPayload::Pubkey)) - } - - pub fn parse<'a>( - expected: TokenPayload, - parser: &mut TokenParser<'a>, - ) -> Result> { - match expected { - TokenPayload::PubkeySource => Ok(Payload::pubkey_source( - PubkeySource::parse_from_tokens(parser)?, - )), - TokenPayload::Pubkey => { - let pubkey = parser.try_parse(|p| { - let hex = p.pull_token()?; - Pubkey::from_hex(hex).map_err(|_| ParseError::HexDecodeFailed) - })?; - - Ok(Payload::pubkey(pubkey)) - } - TokenPayload::NoteId => { - let note_id = parser.try_parse(|p| { - let hex = p.pull_token()?; - NoteId::from_hex(hex).map_err(|_| ParseError::HexDecodeFailed) - })?; - - Ok(Payload::note_id(note_id)) - } - } - } - - pub fn pubkey(pubkey: Pubkey) -> Self { - Self::Pubkey(pubkey) - } - - pub fn pubkey_source(pubkey_src: PubkeySource) -> Self { - Self::PubkeySource(pubkey_src) - } - - pub fn note_id(note_id: NoteId) -> Self { - Self::NoteId(note_id) - } - - pub fn get_pubkey(&self) -> Option<&Pubkey> { - if let Self::Pubkey(pubkey) = self { - Some(pubkey) - } else { - None - } - } - - pub fn get_pubkey_source(&self) -> Option<&PubkeySource> { - if let Self::PubkeySource(pk_src) = self { - Some(pk_src) - } else { - None - } - } - - pub fn get_note_id(&self) -> Option<&NoteId> { - if let Self::NoteId(note_id) = self { - Some(note_id) - } else { - None - } - } -} - -impl Token { - pub fn parse<'a>( - &self, - parser: &mut TokenParser<'a>, - ) -> Result, ParseError<'a>> { - match self { - Token::Identifier(s) => { - parser.parse_token(s)?; - Ok(None) - } - - Token::Payload(payload) => { - let payload = Payload::parse(*payload, parser)?; - Ok(Some(payload)) - } - - Token::Alternatives(alts) => { - if parser.try_parse(|p| p.parse_token(alts.preferred)).is_ok() { - return Ok(None); - } - - for token in alts.deprecated { - if parser.try_parse(|p| p.parse_token(token)).is_ok() { - return Ok(None); - } - } - - Err(ParseError::AltAllFailed) - } - } - } - - /// Parse all of the tokens in sequence, ensuring that we extract a payload - /// if we find one. This only handles a single payload, if you need more, - /// then use a custom parser - pub fn parse_all<'a>( - parser: &mut TokenParser<'a>, - tokens: &[Token], - ) -> Result, ParseError<'a>> { - parser.try_parse(|p| { - let mut payload: Option = None; - for token in tokens { - if let Some(pl) = token.parse(p)? { - payload = Some(pl); - } - } - - Ok(payload) - }) - } - - pub fn serialize_all(writer: &mut TokenWriter, tokens: &[Token], payload: Option<&Payload>) { - for token in tokens { - token.serialize(writer, payload) - } - } - - pub fn serialize(&self, writer: &mut TokenWriter, payload: Option<&Payload>) { - match self { - Token::Identifier(s) => writer.write_token(s), - Token::Alternatives(alts) => writer.write_token(alts.preferred), - Token::Payload(token_payload) => match token_payload { - TokenPayload::PubkeySource => { - payload - .and_then(|p| p.get_pubkey_source()) - .expect("expected pubkey payload") - .serialize_tokens(writer); - } - - TokenPayload::Pubkey => { - let pubkey = payload - .and_then(|p| p.get_pubkey()) - .expect("expected note_id payload"); - writer.write_token(&hex::encode(pubkey.bytes())); - } - - TokenPayload::NoteId => { - let note_id = payload - .and_then(|p| p.get_note_id()) - .expect("expected note_id payload"); - writer.write_token(&hex::encode(note_id.bytes())); - } - }, - } - } - - pub const fn id(s: &'static str) -> Self { - Token::Identifier(s) - } - - pub const fn alts(primary: &'static str, deprecated: &'static [&'static str]) -> Self { - Token::Alternatives(TokenAlternatives::new(primary, deprecated)) - } - - pub const fn pubkey() -> Self { - Token::Payload(TokenPayload::Pubkey) - } - - pub const fn pubkey_source() -> Self { - Token::Payload(TokenPayload::PubkeySource) - } - - pub const fn note_id() -> Self { - Token::Payload(TokenPayload::NoteId) - } -} - -#[derive(Debug, Clone)] -pub enum ParseError<'a> { - /// Not done parsing yet - Incomplete, - - /// All parsing options failed - AltAllFailed, - - /// There was some issue decoding the data - DecodeFailed, - - /// There was some issue decoding the data - ExpectedPayload(TokenPayload), - - HexDecodeFailed, - - /// We encountered an unexpected token - UnexpectedToken(UnexpectedToken<'a, 'static>), - - /// No more tokens - EOF, -} - -pub struct TokenWriter { - delim: &'static str, - tokens_written: usize, - buf: Vec, -} - -impl Default for TokenWriter { - fn default() -> Self { - Self::new(":") - } -} - -impl TokenWriter { - pub fn new(delim: &'static str) -> Self { - let buf = vec![]; - let tokens_written = 0; - Self { - buf, - tokens_written, - delim, - } - } - - pub fn write_token(&mut self, token: &str) { - if self.tokens_written > 0 { - self.buf.extend_from_slice(self.delim.as_bytes()) - } - self.buf.extend_from_slice(token.as_bytes()); - self.tokens_written += 1; - } - - pub fn str(&self) -> &str { - // SAFETY: only &strs are ever serialized, so its guaranteed to be - // correct here - unsafe { std::str::from_utf8_unchecked(self.buffer()) } - } - - pub fn buffer(&self) -> &[u8] { - &self.buf - } -} - -#[derive(Clone)] -pub struct TokenParser<'a> { - tokens: &'a [&'a str], - index: usize, -} - -fn _parse_pubkey_src_tokens<'a>( - parser: &mut TokenParser<'a>, -) -> Result> { - match parser.pull_token() { - // we handle bare payloads and assume they are explicit pubkey sources - Ok("explicit") => { - let hex_str = parser.pull_token()?; - Pubkey::from_hex(hex_str) - .map_err(|_| ParseError::DecodeFailed) - .map(PubkeySource::Explicit) - } - - Err(ParseError::EOF) | Ok("deck_author") => Ok(PubkeySource::DeckAuthor), - - Ok(hex_payload) => Pubkey::from_hex(hex_payload) - .map_err(|_| ParseError::DecodeFailed) - .map(PubkeySource::Explicit), - - Err(e) => Err(e), - } -} - -impl<'a> TokenParser<'a> { - /// alt tries each parser in `routes` until one succeeds. - /// If all fail, returns `ParseError::AltAllFailed`. - #[allow(clippy::type_complexity)] - pub fn alt( - parser: &mut TokenParser<'a>, - routes: &[fn(&mut TokenParser<'a>) -> Result>], - ) -> Result> { - let start = parser.index; - for route in routes { - match route(parser) { - Ok(r) => return Ok(r), // if success, stop trying more routes - Err(_) => { - // revert index & try next route - parser.index = start; - } - } - } - // if we tried them all and none succeeded - Err(ParseError::AltAllFailed) - } - - pub fn new(tokens: &'a [&'a str]) -> Self { - let index = 0; - Self { tokens, index } - } - - pub fn peek_parse_token(&mut self, expected: &'static str) -> Result<&'a str, ParseError<'a>> { - let found = self.peek_token()?; - if found == expected { - Ok(found) - } else { - Err(ParseError::UnexpectedToken(UnexpectedToken { - expected, - found, - })) - } - } - - pub fn parse_token(&mut self, expected: &'static str) -> Result<&'a str, ParseError<'a>> { - let found = self.pull_token()?; - if found == expected { - Ok(found) - } else { - Err(ParseError::UnexpectedToken(UnexpectedToken { - expected, - found, - })) - } - } - - /// “Parse all” meaning: run the provided closure. If it fails, revert - /// the index. - pub fn parse_all( - &mut self, - parse_fn: impl FnOnce(&mut Self) -> Result>, - ) -> Result> { - let start = self.index; - let result = parse_fn(self); - - // If the parser closure fails, revert the index - if result.is_err() { - self.index = start; - result - } else if !self.is_eof() { - Err(ParseError::Incomplete) - } else { - result - } - } - - /// Attempt to parse something, backtrack if we fail. - pub fn try_parse( - &mut self, - parse_fn: impl FnOnce(&mut Self) -> Result>, - ) -> Result> { - let start = self.index; - let result = parse_fn(self); - - // If the parser closure fails, revert the index - if result.is_err() { - self.index = start; - result - } else { - result - } - } - - pub fn pull_token(&mut self) -> Result<&'a str, ParseError<'a>> { - let token = self - .tokens - .get(self.index) - .copied() - .ok_or(ParseError::EOF)?; - self.index += 1; - Ok(token) - } - - pub fn unpop_token(&mut self) { - if (self.index as isize) - 1 < 0 { - return; - } - - self.index -= 1; - } - - pub fn peek_token(&self) -> Result<&'a str, ParseError<'a>> { - self.tokens() - .first() - .ok_or(ParseError::DecodeFailed) - .copied() - } - - #[inline] - pub fn tokens(&self) -> &'a [&'a str] { - let min_index = self.index.min(self.tokens.len()); - &self.tokens[min_index..] - } - - #[inline] - pub fn is_eof(&self) -> bool { - self.tokens().is_empty() - } -} - -pub trait TokenSerializable: Sized { - /// Return a list of serialization plans for a type. We do this for - /// type safety and assume constructing these types are lightweight - fn parse_from_tokens<'a>(parser: &mut TokenParser<'a>) -> Result>; - fn serialize_tokens(&self, writer: &mut TokenWriter); -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_column_serialize() { - use crate::ui::add_column::{AddAlgoRoute, AddColumnRoute}; - - { - let data_str = "column:algo_selection:last_per_pubkey"; - let data = &data_str.split(":").collect::>(); - let mut token_writer = TokenWriter::default(); - let mut parser = TokenParser::new(&data); - let parsed = AddColumnRoute::parse_from_tokens(&mut parser).unwrap(); - let expected = AddColumnRoute::Algo(AddAlgoRoute::LastPerPubkey); - parsed.serialize_tokens(&mut token_writer); - assert_eq!(expected, parsed); - assert_eq!(token_writer.str(), data_str); - } - - { - let data_str = "column"; - let mut token_writer = TokenWriter::default(); - let data: &[&str] = &[data_str]; - let mut parser = TokenParser::new(data); - let parsed = AddColumnRoute::parse_from_tokens(&mut parser).unwrap(); - let expected = AddColumnRoute::Base; - parsed.serialize_tokens(&mut token_writer); - assert_eq!(expected, parsed); - assert_eq!(token_writer.str(), data_str); - } - } -} diff --git a/crates/notedeck_columns/src/timeline/kind.rs b/crates/notedeck_columns/src/timeline/kind.rs index 8aa40b85..3b22129d 100644 --- a/crates/notedeck_columns/src/timeline/kind.rs +++ b/crates/notedeck_columns/src/timeline/kind.rs @@ -1,13 +1,11 @@ use crate::error::Error; -use crate::storage::{ - ParseError, Payload, Token, TokenParser, TokenPayload, TokenSerializable, TokenWriter, -}; use crate::timeline::{Timeline, TimelineTab}; use enostr::{Filter, Pubkey}; use nostrdb::{Ndb, Transaction}; use notedeck::{filter::default_limit, FilterError, FilterState, RootNoteIdBuf}; use serde::{Deserialize, Serialize}; use std::{borrow::Cow, fmt::Display}; +use tokenator::{ParseError, TokenParser, TokenSerializable, TokenWriter}; use tracing::{error, warn}; #[derive(Clone, Default, Debug, PartialEq, Eq, Serialize, Deserialize)] @@ -63,7 +61,7 @@ impl TokenSerializable for PubkeySource { let pk = Pubkey::from_hex(hex).map_err(|_| ParseError::HexDecodeFailed)?; Ok(PubkeySource::Explicit(pk)) } else { - Err(ParseError::ExpectedPayload(TokenPayload::Pubkey)) + Err(ParseError::HexDecodeFailed) } } @@ -78,8 +76,6 @@ impl TokenSerializable for PubkeySource { } } -const LIST_CONTACT_TOKENS: &[Token] = &[Token::alts("contacts", &["contact"]), Token::pubkey()]; - impl ListKind { pub fn contact_list(pk_src: PubkeySource) -> Self { ListKind::Contact(pk_src) @@ -90,38 +86,39 @@ impl ListKind { ListKind::Contact(pk_src) => Some(pk_src), } } - - fn payload(&self) -> Option { - match self { - ListKind::Contact(pk_src) => Some(Payload::pubkey_source(pk_src.clone())), - } - } - - const fn tokens(&self) -> &'static [Token] { - match self { - ListKind::Contact(_pubkey) => LIST_CONTACT_TOKENS, - } - } } impl TokenSerializable for ListKind { fn serialize_tokens(&self, writer: &mut TokenWriter) { - Token::serialize_all(writer, self.tokens(), self.payload().as_ref()); + match self { + ListKind::Contact(pk_src) => { + writer.write_token("contact"); + pk_src.serialize_tokens(writer); + } + } } fn parse_from_tokens<'a>(parser: &mut TokenParser<'a>) -> Result> { + parser.parse_all(|p| { + p.parse_token("contact")?; + let pk_src = PubkeySource::parse_from_tokens(p)?; + Ok(ListKind::Contact(pk_src)) + }) + + /* here for u when you need more things to parse TokenParser::alt( parser, &[|p| { - let maybe_payload = - Token::parse_all(p, ListKind::Contact(PubkeySource::default()).tokens())?; - let payload = maybe_payload - .as_ref() - .and_then(|mp| mp.get_pubkey_source()) - .ok_or(ParseError::ExpectedPayload(TokenPayload::Pubkey))?; - Ok(ListKind::Contact(payload.to_owned())) + p.parse_all(|p| { + p.parse_token("contact")?; + let pk_src = PubkeySource::parse_from_tokens(p)?; + Ok(ListKind::Contact(pk_src)) + }); + },|p| { + // more cases... }], ) + */ } } diff --git a/crates/notedeck_columns/src/timeline/route.rs b/crates/notedeck_columns/src/timeline/route.rs index 7a0effc4..89ff5ee8 100644 --- a/crates/notedeck_columns/src/timeline/route.rs +++ b/crates/notedeck_columns/src/timeline/route.rs @@ -3,7 +3,6 @@ use crate::{ draft::Drafts, nav::RenderNavAction, profile::ProfileAction, - storage::{ParseError, Payload, Token, TokenParser, TokenSerializable, TokenWriter}, timeline::{TimelineCache, TimelineId, TimelineKind}, ui::{ self, @@ -12,6 +11,8 @@ use crate::{ }, }; +use tokenator::{ParseError, TokenParser, TokenSerializable, TokenWriter}; + use enostr::{NoteId, Pubkey}; use nostrdb::{Ndb, Transaction}; use notedeck::{Accounts, ImageCache, MuteFun, NoteCache, UnknownIds}; @@ -25,61 +26,61 @@ pub enum TimelineRoute { Quote(NoteId), } -const PROFILE_TOKENS: &[Token] = &[Token::id("profile"), Token::pubkey()]; -const THREAD_TOKENS: &[Token] = &[Token::id("thread"), Token::note_id()]; -const REPLY_TOKENS: &[Token] = &[Token::id("reply"), Token::note_id()]; -const QUOTE_TOKENS: &[Token] = &[Token::id("quote"), Token::note_id()]; +fn parse_pubkey<'a>(parser: &mut TokenParser<'a>) -> Result> { + let hex = parser.pull_token()?; + Pubkey::from_hex(hex).map_err(|_| ParseError::HexDecodeFailed) +} -impl TimelineRoute { - fn payload(&self) -> Option { - match self { - TimelineRoute::Profile(pk) => Some(Payload::pubkey(*pk)), - TimelineRoute::Thread(note_id) => Some(Payload::note_id(*note_id)), - TimelineRoute::Reply(note_id) => Some(Payload::note_id(*note_id)), - TimelineRoute::Quote(note_id) => Some(Payload::note_id(*note_id)), - TimelineRoute::Timeline(_timeline_id) => todo!("handle timeline_ids"), - } - } - - fn tokens(&self) -> &'static [Token] { - match self { - TimelineRoute::Profile(_) => PROFILE_TOKENS, - TimelineRoute::Thread(_) => THREAD_TOKENS, - TimelineRoute::Reply(_) => REPLY_TOKENS, - TimelineRoute::Quote(_) => QUOTE_TOKENS, - TimelineRoute::Timeline(_) => todo!("handle timeline_ids"), - } - } - - /// NOTE!! update parse_from_tokens as well when adding to this match - fn parse<'a>(&self, parser: &mut TokenParser<'a>) -> Result> { - let payload = Token::parse_all(parser, self.tokens())?; - - match self { - TimelineRoute::Profile(_) => { - Ok(TimelineRoute::Profile(Payload::parse_pubkey(payload)?)) - } - TimelineRoute::Thread(_) => Ok(TimelineRoute::Thread(Payload::parse_note_id(payload)?)), - TimelineRoute::Reply(_) => Ok(TimelineRoute::Reply(Payload::parse_note_id(payload)?)), - TimelineRoute::Quote(_) => Ok(TimelineRoute::Quote(Payload::parse_note_id(payload)?)), - TimelineRoute::Timeline(_) => todo!("handle timeline parsing"), - } - } +fn parse_note_id<'a>(parser: &mut TokenParser<'a>) -> Result> { + let hex = parser.pull_token()?; + NoteId::from_hex(hex).map_err(|_| ParseError::HexDecodeFailed) } impl TokenSerializable for TimelineRoute { fn serialize_tokens(&self, writer: &mut TokenWriter) { - Token::serialize_all(writer, self.tokens(), self.payload().as_ref()); + match self { + TimelineRoute::Profile(pk) => { + writer.write_token("profile"); + writer.write_token(&pk.hex()); + } + TimelineRoute::Thread(note_id) => { + writer.write_token("thread"); + writer.write_token(¬e_id.hex()); + } + TimelineRoute::Reply(note_id) => { + writer.write_token("reply"); + writer.write_token(¬e_id.hex()); + } + TimelineRoute::Quote(note_id) => { + writer.write_token("quote"); + writer.write_token(¬e_id.hex()); + } + TimelineRoute::Timeline(_tlid) => { + todo!("tlid") + } + } } fn parse_from_tokens<'a>(parser: &mut TokenParser<'a>) -> Result> { TokenParser::alt( parser, &[ - |p| TimelineRoute::Profile(Pubkey::new([0; 32])).parse(p), - |p| TimelineRoute::Thread(NoteId::new([0; 32])).parse(p), - |p| TimelineRoute::Reply(NoteId::new([0; 32])).parse(p), - |p| TimelineRoute::Quote(NoteId::new([0; 32])).parse(p), + |p| { + p.parse_token("profile")?; + Ok(TimelineRoute::Profile(parse_pubkey(p)?)) + }, + |p| { + p.parse_token("thread")?; + Ok(TimelineRoute::Thread(parse_note_id(p)?)) + }, + |p| { + p.parse_token("reply")?; + Ok(TimelineRoute::Reply(parse_note_id(p)?)) + }, + |p| { + p.parse_token("quote")?; + Ok(TimelineRoute::Quote(parse_note_id(p)?)) + }, |_p| todo!("handle timeline parsing"), ], ) @@ -258,8 +259,8 @@ pub fn render_profile_route( #[cfg(test)] mod tests { - use crate::storage::{TokenParser, TokenSerializable, TokenWriter}; use enostr::NoteId; + use tokenator::{TokenParser, TokenSerializable, TokenWriter}; #[test] fn test_timeline_route_serialize() { diff --git a/crates/notedeck_columns/src/ui/add_column.rs b/crates/notedeck_columns/src/ui/add_column.rs index cc8610f0..aeed68fb 100644 --- a/crates/notedeck_columns/src/ui/add_column.rs +++ b/crates/notedeck_columns/src/ui/add_column.rs @@ -11,13 +11,13 @@ use nostrdb::{Ndb, Transaction}; use crate::{ login_manager::AcquireKeyState, route::Route, - storage::{ParseError, TokenParser, TokenSerializable, TokenWriter}, timeline::{kind::ListKind, PubkeySource, Timeline, TimelineKind}, ui::anim::ICON_EXPANSION_MULTIPLE, Damus, }; use notedeck::{AppContext, ImageCache, NotedeckTextStyle, UserAccount}; +use tokenator::{ParseError, TokenParser, TokenSerializable, TokenWriter}; use super::{anim::AnimationHelper, padding, ProfilePreview}; @@ -765,3 +765,37 @@ pub fn hashtag_ui( }) .inner } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_column_serialize() { + use super::{AddAlgoRoute, AddColumnRoute}; + + { + let data_str = "column:algo_selection:last_per_pubkey"; + let data = &data_str.split(":").collect::>(); + let mut token_writer = TokenWriter::default(); + let mut parser = TokenParser::new(&data); + let parsed = AddColumnRoute::parse_from_tokens(&mut parser).unwrap(); + let expected = AddColumnRoute::Algo(AddAlgoRoute::LastPerPubkey); + parsed.serialize_tokens(&mut token_writer); + assert_eq!(expected, parsed); + assert_eq!(token_writer.str(), data_str); + } + + { + let data_str = "column"; + let mut token_writer = TokenWriter::default(); + let data: &[&str] = &[data_str]; + let mut parser = TokenParser::new(data); + let parsed = AddColumnRoute::parse_from_tokens(&mut parser).unwrap(); + let expected = AddColumnRoute::Base; + parsed.serialize_tokens(&mut token_writer); + assert_eq!(expected, parsed); + assert_eq!(token_writer.str(), data_str); + } + } +} diff --git a/crates/tokenator/Cargo.toml b/crates/tokenator/Cargo.toml new file mode 100644 index 00000000..38a4d16f --- /dev/null +++ b/crates/tokenator/Cargo.toml @@ -0,0 +1,7 @@ +[package] +name = "tokenator" +version = "0.1.0" +edition = "2021" +description = "A simple library for parsing a serializing string tokens" + +[dependencies] diff --git a/crates/tokenator/README.md b/crates/tokenator/README.md new file mode 100644 index 00000000..0fc537d5 --- /dev/null +++ b/crates/tokenator/README.md @@ -0,0 +1,5 @@ + +# tokenator + +Tokenator is a simple string token parser and serializer. + diff --git a/crates/tokenator/src/lib.rs b/crates/tokenator/src/lib.rs new file mode 100644 index 00000000..0206b69a --- /dev/null +++ b/crates/tokenator/src/lib.rs @@ -0,0 +1,220 @@ +#[derive(Debug, Clone)] +pub struct UnexpectedToken<'fnd, 'exp> { + pub expected: &'exp str, + pub found: &'fnd str, +} + +#[derive(Debug, Clone)] +pub enum ParseError<'a> { + /// Not done parsing yet + Incomplete, + + /// All parsing options failed + AltAllFailed, + + /// There was some issue decoding the data + DecodeFailed, + + HexDecodeFailed, + + /// We encountered an unexpected token + UnexpectedToken(UnexpectedToken<'a, 'static>), + + /// No more tokens + EOF, +} + +pub struct TokenWriter { + delim: &'static str, + tokens_written: usize, + buf: Vec, +} + +impl Default for TokenWriter { + fn default() -> Self { + Self::new(":") + } +} + +impl TokenWriter { + pub fn new(delim: &'static str) -> Self { + let buf = vec![]; + let tokens_written = 0; + Self { + buf, + tokens_written, + delim, + } + } + + pub fn write_token(&mut self, token: &str) { + if self.tokens_written > 0 { + self.buf.extend_from_slice(self.delim.as_bytes()) + } + self.buf.extend_from_slice(token.as_bytes()); + self.tokens_written += 1; + } + + pub fn str(&self) -> &str { + // SAFETY: only &strs are ever serialized, so its guaranteed to be + // correct here + unsafe { std::str::from_utf8_unchecked(self.buffer()) } + } + + pub fn buffer(&self) -> &[u8] { + &self.buf + } +} + +#[derive(Clone)] +pub struct TokenParser<'a> { + tokens: &'a [&'a str], + index: usize, +} + +impl<'a> TokenParser<'a> { + /// alt tries each parser in `routes` until one succeeds. + /// If all fail, returns `ParseError::AltAllFailed`. + #[allow(clippy::type_complexity)] + pub fn alt( + parser: &mut TokenParser<'a>, + routes: &[fn(&mut TokenParser<'a>) -> Result>], + ) -> Result> { + let start = parser.index; + for route in routes { + match route(parser) { + Ok(r) => return Ok(r), // if success, stop trying more routes + Err(_) => { + // revert index & try next route + parser.index = start; + } + } + } + // if we tried them all and none succeeded + Err(ParseError::AltAllFailed) + } + + pub fn new(tokens: &'a [&'a str]) -> Self { + let index = 0; + Self { tokens, index } + } + + pub fn peek_parse_token(&mut self, expected: &'static str) -> Result<&'a str, ParseError<'a>> { + let found = self.peek_token()?; + if found == expected { + Ok(found) + } else { + Err(ParseError::UnexpectedToken(UnexpectedToken { + expected, + found, + })) + } + } + + /// Parse a list of alternative tokens, returning success if any match. + pub fn parse_any_token( + &mut self, + expected: &[&'static str], + ) -> Result<&'a str, ParseError<'a>> { + for token in expected { + let result = self.try_parse(|p| p.parse_token(token)); + if result.is_ok() { + return result; + } + } + + Err(ParseError::AltAllFailed) + } + + pub fn parse_token(&mut self, expected: &'static str) -> Result<&'a str, ParseError<'a>> { + let found = self.pull_token()?; + if found == expected { + Ok(found) + } else { + Err(ParseError::UnexpectedToken(UnexpectedToken { + expected, + found, + })) + } + } + + /// Ensure that we have parsed all tokens. If not the parser backtracks + /// and the parse does not succeed, returning [`ParseError::Incomplete`]. + pub fn parse_all( + &mut self, + parse_fn: impl FnOnce(&mut Self) -> Result>, + ) -> Result> { + let start = self.index; + let result = parse_fn(self); + + // If the parser closure fails, revert the index + if result.is_err() { + self.index = start; + result + } else if !self.is_eof() { + Err(ParseError::Incomplete) + } else { + result + } + } + + /// Attempt to parse something, backtrack if we fail. + pub fn try_parse( + &mut self, + parse_fn: impl FnOnce(&mut Self) -> Result>, + ) -> Result> { + let start = self.index; + let result = parse_fn(self); + + // If the parser closure fails, revert the index + if result.is_err() { + self.index = start; + result + } else { + result + } + } + + pub fn pull_token(&mut self) -> Result<&'a str, ParseError<'a>> { + let token = self + .tokens + .get(self.index) + .copied() + .ok_or(ParseError::EOF)?; + self.index += 1; + Ok(token) + } + + pub fn unpop_token(&mut self) { + if (self.index as isize) - 1 < 0 { + return; + } + + self.index -= 1; + } + + pub fn peek_token(&self) -> Result<&'a str, ParseError<'a>> { + self.tokens() + .first() + .ok_or(ParseError::DecodeFailed) + .copied() + } + + #[inline] + pub fn tokens(&self) -> &'a [&'a str] { + let min_index = self.index.min(self.tokens.len()); + &self.tokens[min_index..] + } + + #[inline] + pub fn is_eof(&self) -> bool { + self.tokens().is_empty() + } +} + +pub trait TokenSerializable: Sized { + /// Return a list of serialization plans for a type. We do this for + /// type safety and assume constructing these types are lightweight + fn parse_from_tokens<'a>(parser: &mut TokenParser<'a>) -> Result>; + fn serialize_tokens(&self, writer: &mut TokenWriter); +}