Handle punctuation better

This commit is contained in:
Daniel Saxton
2024-12-24 19:14:46 -06:00
parent 7916961bf4
commit bc7a3c8927

View File

@@ -129,11 +129,11 @@ impl NewPost {
fn extract_hashtags(content: &str) -> HashSet<String> {
let mut hashtags = HashSet::new();
for word in content.split_whitespace() {
for word in
content.split(|c: char| c.is_whitespace() || (c.is_ascii_punctuation() && c != '#'))
{
if word.starts_with('#') && word.len() > 1 {
let tag = word[1..]
.trim_end_matches(|c: char| c.is_ascii_punctuation())
.to_lowercase();
let tag = word[1..].to_lowercase();
if !tag.is_empty() {
hashtags.insert(tag);
}
@@ -163,6 +163,9 @@ mod tests {
"#tag1, #tag2, #tag3 with commas",
vec!["tag1", "tag2", "tag3"],
),
("Separated by commas #tag1,#tag2", vec!["tag1", "tag2"]),
("Separated by periods #tag1.#tag2", vec!["tag1", "tag2"]),
("Separated by semicolons #tag1;#tag2", vec!["tag1", "tag2"]),
];
for (input, expected) in test_cases {