From e83e110adb50b539956123d7e324df8d380f2257 Mon Sep 17 00:00:00 2001 From: cr0bar Date: Mon, 10 Jul 2023 21:26:05 +0100 Subject: [PATCH 1/5] Fix to is_boundary to support non-latin characters --- damus-c/cursor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/damus-c/cursor.h b/damus-c/cursor.h index 78c7bc82..a3bd78aa 100644 --- a/damus-c/cursor.h +++ b/damus-c/cursor.h @@ -432,7 +432,7 @@ static inline int is_whitespace(char c) { } static inline int is_boundary(char c) { - return !isalnum(c); + return is_whitespace(c) || ispunct(c); } static inline int is_invalid_url_ending(char c) { From 2353f971146ce1dce2bf4d5ccd3c9687bdfd4310 Mon Sep 17 00:00:00 2001 From: cr0bar Date: Mon, 10 Jul 2023 21:28:10 +0100 Subject: [PATCH 2/5] Change to is_hashtag_chat to support non-latin characters --- damus/Models/Mentions.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/damus/Models/Mentions.swift b/damus/Models/Mentions.swift index ba1904fa..9d8ea5a0 100644 --- a/damus/Models/Mentions.swift +++ b/damus/Models/Mentions.swift @@ -415,7 +415,7 @@ func parse_while(_ p: Parser, match: (Character) -> Bool) -> String? { } func is_hashtag_char(_ c: Character) -> Bool { - return c.isLetter || c.isNumber + return (c.isLetter || c.isNumber || c.isASCII) && (!c.isPunctuation && !c.isWhitespace) } func prev_char(_ p: Parser, n: Int) -> Character? { From c1220f50af281ab999ab8d4fafb3d1c036791671 Mon Sep 17 00:00:00 2001 From: cr0bar Date: Mon, 10 Jul 2023 21:29:11 +0100 Subject: [PATCH 3/5] Handle percent encoding of colon for some hashtags --- damus/Nostr/NostrLink.swift | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/damus/Nostr/NostrLink.swift b/damus/Nostr/NostrLink.swift index 5d44bac2..3a15894a 100644 --- a/damus/Nostr/NostrLink.swift +++ b/damus/Nostr/NostrLink.swift @@ -116,8 +116,12 @@ func decode_nostr_uri(_ s: String) -> NostrLink? { return decode_universal_link(s) } - var uri = s.replacingOccurrences(of: "nostr://", with: "") + var uri = s + uri = uri.replacingOccurrences(of: "nostr://", with: "") uri = uri.replacingOccurrences(of: "nostr:", with: "") + + // Fix for non-latin characters resulting in second colon being encoded + uri = uri.replacingOccurrences(of: "damus:t%3A", with: "t:") uri = uri.replacingOccurrences(of: "damus://", with: "") uri = uri.replacingOccurrences(of: "damus:", with: "") From ac2b5b26bbac89bdf193209be13c6d53cc4d9200 Mon Sep 17 00:00:00 2001 From: cr0bar Date: Mon, 10 Jul 2023 22:10:44 +0100 Subject: [PATCH 4/5] Added non-latin test and amended emoji test to include emoji in hashtag --- damusTests/damusTests.swift | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/damusTests/damusTests.swift b/damusTests/damusTests.swift index e0342dfc..5448b0cc 100644 --- a/damusTests/damusTests.swift +++ b/damusTests/damusTests.swift @@ -203,8 +203,27 @@ class damusTests: XCTestCase { XCTAssertNotNil(parsed) XCTAssertEqual(parsed.count, 3) XCTAssertEqual(parsed[0].is_text, "some hashtag ") - XCTAssertEqual(parsed[1].is_hashtag, "bitcoin") - XCTAssertEqual(parsed[2].is_text, "☕️ cool") + XCTAssertEqual(parsed[1].is_hashtag, "bitcoin☕️") + XCTAssertEqual(parsed[2].is_text, " cool") + } + + func testHashtagWithAccents() { + let parsed = parse_mentions(content: "hello from #türkiye", tags: []).blocks + + XCTAssertNotNil(parsed) + XCTAssertEqual(parsed.count, 2) + XCTAssertEqual(parsed[0].is_text, "hello from ") + XCTAssertEqual(parsed[1].is_hashtag, "türkiye") + } + + func testHashtagWithNonLatinCharacters() { + let parsed = parse_mentions(content: "this is a #시험 hope it works", tags: []).blocks + + XCTAssertNotNil(parsed) + XCTAssertEqual(parsed.count, 3) + XCTAssertEqual(parsed[0].is_text, "this is a ") + XCTAssertEqual(parsed[1].is_hashtag, "시험") + XCTAssertEqual(parsed[2].is_text, " hope it works") } func testParseHashtagEnd() { From 031c7823ae5078de13e96a504750cf69cda7874f Mon Sep 17 00:00:00 2001 From: William Casarin Date: Tue, 11 Jul 2023 07:21:16 -0700 Subject: [PATCH 5/5] refactor: move hashtag tests to their own file --- damus.xcodeproj/project.pbxproj | 8 +++- damusTests/HashtagTests.swift | 79 +++++++++++++++++++++++++++++++++ damusTests/damusTests.swift | 59 ------------------------ 3 files changed, 85 insertions(+), 61 deletions(-) create mode 100644 damusTests/HashtagTests.swift diff --git a/damus.xcodeproj/project.pbxproj b/damus.xcodeproj/project.pbxproj index ab851143..7c0e5061 100644 --- a/damus.xcodeproj/project.pbxproj +++ b/damus.xcodeproj/project.pbxproj @@ -53,6 +53,7 @@ 4C198DF229F88C6B004C165C /* BlurHashDecode.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4C198DEE29F88C6B004C165C /* BlurHashDecode.swift */; }; 4C198DF529F88D2E004C165C /* ImageMetadata.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4C198DF429F88D2E004C165C /* ImageMetadata.swift */; }; 4C19AE512A5CEF7C00C90DB7 /* NostrScript.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4C19AE4C2A5CEF7C00C90DB7 /* NostrScript.swift */; }; + 4C19AE552A5D977400C90DB7 /* HashtagTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4C19AE542A5D977400C90DB7 /* HashtagTests.swift */; }; 4C1A9A1A29DCA17E00516EAC /* ReplyCounter.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4C1A9A1929DCA17E00516EAC /* ReplyCounter.swift */; }; 4C1A9A1D29DDCF9B00516EAC /* NotificationSettingsView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4C1A9A1C29DDCF9B00516EAC /* NotificationSettingsView.swift */; }; 4C1A9A1F29DDD24B00516EAC /* AppearanceSettingsView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4C1A9A1E29DDD24B00516EAC /* AppearanceSettingsView.swift */; }; @@ -495,6 +496,7 @@ 4C19AE4B2A5CEF7C00C90DB7 /* primal.ts */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.typescript; path = primal.ts; sourceTree = ""; }; 4C19AE4C2A5CEF7C00C90DB7 /* NostrScript.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = NostrScript.swift; sourceTree = ""; }; 4C19AE502A5CEF7C00C90DB7 /* nostr.ts */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.typescript; path = nostr.ts; sourceTree = ""; }; + 4C19AE542A5D977400C90DB7 /* HashtagTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = HashtagTests.swift; sourceTree = ""; }; 4C1A9A1929DCA17E00516EAC /* ReplyCounter.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ReplyCounter.swift; sourceTree = ""; }; 4C1A9A1C29DDCF9B00516EAC /* NotificationSettingsView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = NotificationSettingsView.swift; sourceTree = ""; }; 4C1A9A1E29DDD24B00516EAC /* AppearanceSettingsView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AppearanceSettingsView.swift; sourceTree = ""; }; @@ -1510,6 +1512,7 @@ 3A5E47C62A4A76C800C0D090 /* TrieTests.swift */, 3A90B1822A4EA3C600000D94 /* UserSearchCacheTests.swift */, 4C4F14A62A2A61A30045A0B9 /* NostrScriptTests.swift */, + 4C19AE542A5D977400C90DB7 /* HashtagTests.swift */, ); path = damusTests; sourceTree = ""; @@ -2114,6 +2117,7 @@ files = ( 5019CADD2A0FB0A9000069E1 /* ProfileDatabaseTests.swift in Sources */, 3A90B1832A4EA3C600000D94 /* UserSearchCacheTests.swift in Sources */, + 4C19AE552A5D977400C90DB7 /* HashtagTests.swift in Sources */, 3A3040ED29A5CB86008A0F29 /* ReplyDescriptionTests.swift in Sources */, 4C8D00D429E3C5D40036AF10 /* NIP19Tests.swift in Sources */, 3A30410129AB12AA008A0F29 /* EventGroupViewTests.swift in Sources */, @@ -2405,7 +2409,7 @@ CLANG_ENABLE_MODULES = YES; CODE_SIGN_ENTITLEMENTS = damus/damus.entitlements; CODE_SIGN_STYLE = Automatic; - CURRENT_PROJECT_VERSION = 1; + CURRENT_PROJECT_VERSION = 2; DEVELOPMENT_ASSET_PATHS = "\"damus/Preview Content\""; DEVELOPMENT_TEAM = XK7H4JAB3D; ENABLE_PREVIEWS = YES; @@ -2454,7 +2458,7 @@ CLANG_ENABLE_MODULES = YES; CODE_SIGN_ENTITLEMENTS = damus/damus.entitlements; CODE_SIGN_STYLE = Automatic; - CURRENT_PROJECT_VERSION = 1; + CURRENT_PROJECT_VERSION = 2; DEVELOPMENT_ASSET_PATHS = "\"damus/Preview Content\""; DEVELOPMENT_TEAM = XK7H4JAB3D; ENABLE_PREVIEWS = YES; diff --git a/damusTests/HashtagTests.swift b/damusTests/HashtagTests.swift new file mode 100644 index 00000000..0bb6d511 --- /dev/null +++ b/damusTests/HashtagTests.swift @@ -0,0 +1,79 @@ +// +// HashtagTests.swift +// damusTests +// +// Created by William Casarin on 2023-07-11. +// + +import XCTest +@testable import damus + +final class HashtagTests: XCTestCase { + + override func setUpWithError() throws { + // Put setup code here. This method is called before the invocation of each test method in the class. + } + + override func tearDownWithError() throws { + // Put teardown code here. This method is called after the invocation of each test method in the class. + } + + func testParseHashtag() { + let parsed = parse_mentions(content: "some hashtag #bitcoin derp", tags: []).blocks + + XCTAssertNotNil(parsed) + XCTAssertEqual(parsed.count, 3) + XCTAssertEqual(parsed[0].is_text, "some hashtag ") + XCTAssertEqual(parsed[1].is_hashtag, "bitcoin") + XCTAssertEqual(parsed[2].is_text, " derp") + } + + func testHashtagWithComma() { + let parsed = parse_mentions(content: "some hashtag #bitcoin, cool", tags: []).blocks + + XCTAssertNotNil(parsed) + XCTAssertEqual(parsed.count, 3) + XCTAssertEqual(parsed[0].is_text, "some hashtag ") + XCTAssertEqual(parsed[1].is_hashtag, "bitcoin") + XCTAssertEqual(parsed[2].is_text, ", cool") + } + + func testHashtagWithEmoji() { + let parsed = parse_mentions(content: "some hashtag #bitcoin☕️ cool", tags: []).blocks + + XCTAssertNotNil(parsed) + XCTAssertEqual(parsed.count, 3) + XCTAssertEqual(parsed[0].is_text, "some hashtag ") + XCTAssertEqual(parsed[1].is_hashtag, "bitcoin☕️") + XCTAssertEqual(parsed[2].is_text, " cool") + } + + func testHashtagWithAccents() { + let parsed = parse_mentions(content: "hello from #türkiye", tags: []).blocks + + XCTAssertNotNil(parsed) + XCTAssertEqual(parsed.count, 2) + XCTAssertEqual(parsed[0].is_text, "hello from ") + XCTAssertEqual(parsed[1].is_hashtag, "türkiye") + } + + func testHashtagWithNonLatinCharacters() { + let parsed = parse_mentions(content: "this is a #시험 hope it works", tags: []).blocks + + XCTAssertNotNil(parsed) + XCTAssertEqual(parsed.count, 3) + XCTAssertEqual(parsed[0].is_text, "this is a ") + XCTAssertEqual(parsed[1].is_hashtag, "시험") + XCTAssertEqual(parsed[2].is_text, " hope it works") + } + + func testParseHashtagEnd() { + let parsed = parse_mentions(content: "some hashtag #bitcoin", tags: []).blocks + + XCTAssertNotNil(parsed) + XCTAssertEqual(parsed.count, 2) + XCTAssertEqual(parsed[0].is_text, "some hashtag ") + XCTAssertEqual(parsed[1].is_hashtag, "bitcoin") + } + +} diff --git a/damusTests/damusTests.swift b/damusTests/damusTests.swift index 5448b0cc..1e61898b 100644 --- a/damusTests/damusTests.swift +++ b/damusTests/damusTests.swift @@ -176,65 +176,6 @@ class damusTests: XCTestCase { XCTAssertEqual(ev.tags[1][1], "bitcoin") } - - func testParseHashtag() { - let parsed = parse_mentions(content: "some hashtag #bitcoin derp", tags: []).blocks - - XCTAssertNotNil(parsed) - XCTAssertEqual(parsed.count, 3) - XCTAssertEqual(parsed[0].is_text, "some hashtag ") - XCTAssertEqual(parsed[1].is_hashtag, "bitcoin") - XCTAssertEqual(parsed[2].is_text, " derp") - } - - func testHashtagWithComma() { - let parsed = parse_mentions(content: "some hashtag #bitcoin, cool", tags: []).blocks - - XCTAssertNotNil(parsed) - XCTAssertEqual(parsed.count, 3) - XCTAssertEqual(parsed[0].is_text, "some hashtag ") - XCTAssertEqual(parsed[1].is_hashtag, "bitcoin") - XCTAssertEqual(parsed[2].is_text, ", cool") - } - - func testHashtagWithEmoji() { - let parsed = parse_mentions(content: "some hashtag #bitcoin☕️ cool", tags: []).blocks - - XCTAssertNotNil(parsed) - XCTAssertEqual(parsed.count, 3) - XCTAssertEqual(parsed[0].is_text, "some hashtag ") - XCTAssertEqual(parsed[1].is_hashtag, "bitcoin☕️") - XCTAssertEqual(parsed[2].is_text, " cool") - } - - func testHashtagWithAccents() { - let parsed = parse_mentions(content: "hello from #türkiye", tags: []).blocks - - XCTAssertNotNil(parsed) - XCTAssertEqual(parsed.count, 2) - XCTAssertEqual(parsed[0].is_text, "hello from ") - XCTAssertEqual(parsed[1].is_hashtag, "türkiye") - } - - func testHashtagWithNonLatinCharacters() { - let parsed = parse_mentions(content: "this is a #시험 hope it works", tags: []).blocks - - XCTAssertNotNil(parsed) - XCTAssertEqual(parsed.count, 3) - XCTAssertEqual(parsed[0].is_text, "this is a ") - XCTAssertEqual(parsed[1].is_hashtag, "시험") - XCTAssertEqual(parsed[2].is_text, " hope it works") - } - - func testParseHashtagEnd() { - let parsed = parse_mentions(content: "some hashtag #bitcoin", tags: []).blocks - - XCTAssertNotNil(parsed) - XCTAssertEqual(parsed.count, 2) - XCTAssertEqual(parsed[0].is_text, "some hashtag ") - XCTAssertEqual(parsed[1].is_hashtag, "bitcoin") - } - func testParseMentionOnlyText() { let parsed = parse_mentions(content: "there is no mention here", tags: [["e", "event_id"]]).blocks