Compare commits

...

1 Commits

8 changed files with 161 additions and 25 deletions

View File

@@ -25,6 +25,8 @@
3A8CC6CC2A2CFEF900940F5F /* StringUtil.swift in Sources */ = {isa = PBXBuildFile; fileRef = 3A8CC6CB2A2CFEF900940F5F /* StringUtil.swift */; };
3A90B1812A4EA3AF00000D94 /* UserSearchCache.swift in Sources */ = {isa = PBXBuildFile; fileRef = 3A90B1802A4EA3AF00000D94 /* UserSearchCache.swift */; };
3A90B1832A4EA3C600000D94 /* UserSearchCacheTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 3A90B1822A4EA3C600000D94 /* UserSearchCacheTests.swift */; };
3A9ADA302B4CB5F400756AA0 /* TranslatorTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 3A9ADA2F2B4CB5F400756AA0 /* TranslatorTests.swift */; };
3A9ADA322B4CBFD000756AA0 /* BlocksExtensionTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 3A9ADA312B4CBFD000756AA0 /* BlocksExtensionTests.swift */; };
3AA247FD297E3CFF0090C62D /* RepostsModel.swift in Sources */ = {isa = PBXBuildFile; fileRef = 3AA247FC297E3CFF0090C62D /* RepostsModel.swift */; };
3AA247FF297E3D900090C62D /* RepostsView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 3AA247FE297E3D900090C62D /* RepostsView.swift */; };
3AA24802297E3DC20090C62D /* RepostView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 3AA24801297E3DC20090C62D /* RepostView.swift */; };
@@ -260,8 +262,8 @@
4C9B0DF32A65C46800CBDA21 /* ProfileEditButton.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4C9B0DF22A65C46800CBDA21 /* ProfileEditButton.swift */; };
4C9BB83129C0ED4F00FC4E37 /* DisplayName.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4C9BB83029C0ED4F00FC4E37 /* DisplayName.swift */; };
4C9BB83429C12D9900FC4E37 /* EventProfileName.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4C9BB83329C12D9900FC4E37 /* EventProfileName.swift */; };
4C9D6D1B2B1D35D7004E5CD9 /* PullDownSearch.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4C9D6D1A2B1D35D7004E5CD9 /* PullDownSearch.swift */; };
4C9D6D162B1AA9C6004E5CD9 /* DisplayTabBarNotify.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4C9D6D152B1AA9C6004E5CD9 /* DisplayTabBarNotify.swift */; };
4C9D6D1B2B1D35D7004E5CD9 /* PullDownSearch.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4C9D6D1A2B1D35D7004E5CD9 /* PullDownSearch.swift */; };
4C9F18E229AA9B6C008C55EC /* CustomizeZapView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4C9F18E129AA9B6C008C55EC /* CustomizeZapView.swift */; };
4C9F18E429ABDE6D008C55EC /* MaybeAnonPfpView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4C9F18E329ABDE6D008C55EC /* MaybeAnonPfpView.swift */; };
4CA2EFA0280E37AC0044ACD8 /* TimelineView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4CA2EF9F280E37AC0044ACD8 /* TimelineView.swift */; };
@@ -669,6 +671,8 @@
3A96D41A298DA94500388A2A /* nl */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = nl; path = nl.lproj/InfoPlist.strings; sourceTree = "<group>"; };
3A96D41B298DA94500388A2A /* nl */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = nl; path = nl.lproj/Localizable.strings; sourceTree = "<group>"; };
3A96D41C298DA94500388A2A /* nl */ = {isa = PBXFileReference; lastKnownFileType = text.plist.stringsdict; name = nl; path = nl.lproj/Localizable.stringsdict; sourceTree = "<group>"; };
3A9ADA2F2B4CB5F400756AA0 /* TranslatorTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = TranslatorTests.swift; sourceTree = "<group>"; };
3A9ADA312B4CBFD000756AA0 /* BlocksExtensionTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = BlocksExtensionTests.swift; sourceTree = "<group>"; };
3AA247FC297E3CFF0090C62D /* RepostsModel.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = RepostsModel.swift; sourceTree = "<group>"; };
3AA247FE297E3D900090C62D /* RepostsView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = RepostsView.swift; sourceTree = "<group>"; };
3AA24801297E3DC20090C62D /* RepostView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = RepostView.swift; sourceTree = "<group>"; };
@@ -1070,8 +1074,8 @@
4C9B0DF22A65C46800CBDA21 /* ProfileEditButton.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ProfileEditButton.swift; sourceTree = "<group>"; };
4C9BB83029C0ED4F00FC4E37 /* DisplayName.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DisplayName.swift; sourceTree = "<group>"; };
4C9BB83329C12D9900FC4E37 /* EventProfileName.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = EventProfileName.swift; sourceTree = "<group>"; };
4C9D6D1A2B1D35D7004E5CD9 /* PullDownSearch.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PullDownSearch.swift; sourceTree = "<group>"; };
4C9D6D152B1AA9C6004E5CD9 /* DisplayTabBarNotify.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DisplayTabBarNotify.swift; sourceTree = "<group>"; };
4C9D6D1A2B1D35D7004E5CD9 /* PullDownSearch.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PullDownSearch.swift; sourceTree = "<group>"; };
4C9F18E129AA9B6C008C55EC /* CustomizeZapView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = CustomizeZapView.swift; sourceTree = "<group>"; };
4C9F18E329ABDE6D008C55EC /* MaybeAnonPfpView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MaybeAnonPfpView.swift; sourceTree = "<group>"; };
4CA2EF9F280E37AC0044ACD8 /* TimelineView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = TimelineView.swift; sourceTree = "<group>"; };
@@ -2052,6 +2056,7 @@
children = (
4C9B0DED2A65A75F00CBDA21 /* AttrStringTestExtensions.swift */,
B5B4D1422B37D47600844320 /* NdbExtensions.swift */,
3A9ADA2F2B4CB5F400756AA0 /* TranslatorTests.swift */,
);
path = Util;
sourceTree = "<group>";
@@ -2367,6 +2372,7 @@
D72A2CFF2AD9B66B002AFF62 /* EventViewTests.swift */,
D7315A2B2ACDF4DA0036E30A /* DamusCacheManagerTests.swift */,
B501062C2B363036003874F5 /* AuthIntegrationTests.swift */,
3A9ADA312B4CBFD000756AA0 /* BlocksExtensionTests.swift */,
);
path = damusTests;
sourceTree = "<group>";
@@ -3245,6 +3251,7 @@
D78525252A7B2EA4002FA637 /* NoteContentViewTests.swift in Sources */,
4C3EA67B28FF7B3900C48A62 /* InvoiceTests.swift in Sources */,
4C363A9E2828A822006E126D /* ReplyTests.swift in Sources */,
3A9ADA322B4CBFD000756AA0 /* BlocksExtensionTests.swift in Sources */,
4C7D097E2A0C58B900943473 /* WalletConnectTests.swift in Sources */,
4CB883AA297612FF00DC99E7 /* ZapTests.swift in Sources */,
D72A2D022AD9C136002AFF62 /* EventViewTests.swift in Sources */,
@@ -3264,6 +3271,7 @@
3A3040F329A91366008A0F29 /* ProfileViewTests.swift in Sources */,
4CF0ABDC2981A19E00D66079 /* ListTests.swift in Sources */,
4C684A552A7E91FE005E6031 /* LongPostTests.swift in Sources */,
3A9ADA302B4CB5F400756AA0 /* TranslatorTests.swift in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};

File diff suppressed because one or more lines are too long

View File

@@ -6,7 +6,7 @@
//
import Foundation
import NaturalLanguage
fileprivate extension String {
/// Failable initializer to build a Swift.String from a C-backed `str_block_t`.
@@ -218,3 +218,51 @@ extension Block {
}
}
}
extension Blocks {
/// Returns a language hypothesis represented as an ``NLLanguage`` (determined by ``NLLanguageRecognizer``),
/// which is the most likely language detected using the combination of blocks. If it cannot determine one, `nil` is returned.
var languageHypothesis: NLLanguage? {
// Rely on Apple's NLLanguageRecognizer to tell us which language it thinks the blocks are in
// and filter on only the text portions of the content as URLs, hashtags, and anything else confuse the language recognizer.
let originalOnlyText = blocks.compactMap {
if case .text(let txt) = $0 {
// Replacing right single quotation marks () with "typewriter or ASCII apostrophes" (')
// as a workaround to get Apple's language recognizer to predict language the correctly.
// It is important to add this workaround to get the language right because it wastes users' money to send translation requests.
// Until Apple fixes their language model, this workaround will be kept in place.
// See https://en.wikipedia.org/wiki/Apostrophe#Unicode for an explanation of the differences between the two characters.
//
// For example,
// "nevent1qqs0wsknetaju06xk39cv8sttd064amkykqalvfue7ydtg3p0lyfksqzyrhxagf6h8l9cjngatumrg60uq22v66qz979pm32v985ek54ndh8gj42wtp"
// has the note content "Its a meme".
// Without the character replacement, it is 61% confident that the text is in Turkish (tr) and 8% confident that the text is in English (en),
// which is a wildly incorrect hypothesis.
// With the character replacement, it is 65% confident that the text is in English (en) and 24% confident that the text is in Turkish (tr), which is more accurate.
//
// Similarly,
// "nevent1qqspjqlln6wvxrqg6kzl2p7gk0rgr5stc7zz5sstl34cxlw55gvtylgpp4mhxue69uhkummn9ekx7mqpr4mhxue69uhkummnw3ez6ur4vgh8wetvd3hhyer9wghxuet5qy28wumn8ghj7un9d3shjtnwdaehgu3wvfnsygpx6655ve67vqlcme9ld7ww73pqx7msclhwzu8lqmkhvuluxnyc7yhf3xut"
// has the note content "Youre funner".
// Without the character replacement, it is 52% confident that the text is in Norwegian Bokmål (nb) and 41% confident that the text is in English (en).
// With the character replacement, it is 93% confident that the text is in English (en) and 4% confident that the text is in Norwegian Bokmål (nb).
return txt.replacingOccurrences(of: "", with: "'")
}
else {
return nil
}
}
.joined(separator: " ")
// If there is no text, there's nothing to use to detect language.
guard !originalOnlyText.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty else {
return nil
}
let languageRecognizer = NLLanguageRecognizer()
languageRecognizer.processString(originalOnlyText)
// Only accept language recognition hypothesis if there's at least a 50% probability that it's accurate.
return languageRecognizer.languageHypotheses(withMaximum: 1).first(where: { $0.value >= 0.5 })?.key
}
}

View File

@@ -22,7 +22,17 @@ public struct Translator {
self.purple = purple
}
/// Returns true if the translator should attempt to translate from the source language to the target language and valid translation service settings are configured.
func shouldTranslate(from sourceLanguage: String, to targetLanguage: String) -> Bool {
return sourceLanguage != targetLanguage && userSettingsStore.can_translate
}
public func translate(_ text: String, from sourceLanguage: String, to targetLanguage: String) async throws -> String? {
// Do not attempt to translate if the source and target languages are the same.
guard shouldTranslate(from: sourceLanguage, to: targetLanguage) else {
return nil
}
switch userSettingsStore.translation_service {
case .purple:
return try await translateWithPurple(text, from: sourceLanguage, to: targetLanguage)
@@ -35,7 +45,7 @@ public struct Translator {
case .deepl:
return try await translateWithDeepL(text, from: sourceLanguage, to: targetLanguage)
case .none:
return text
return nil
}
}

View File

@@ -0,0 +1,26 @@
//
// BlocksExtensionTests.swift
// damusTests
//
// Created by Terry Yiu on 1/8/24.
//
import XCTest
import NaturalLanguage
@testable import damus
final class BlocksExtensionTests: XCTestCase {
func testLanguageHypothesisIsCorrectWithRightSingleQuotationMark() throws {
let note = try XCTUnwrap(NdbNote.owned_from_json(json: test_english_text_note_with_right_single_quotation_mark))
let blocks = note.blocks(test_keypair)
XCTAssertEqual(blocks.languageHypothesis, NLLanguage.english)
}
func testLanguageHypothesisIsCorrectWithNonEnglishLocale() throws {
let note = try XCTUnwrap(NdbNote.owned_from_json(json: test_japanese_text_note))
let blocks = note.blocks(test_keypair)
XCTAssertEqual(blocks.languageHypothesis, NLLanguage.japanese)
}
}

View File

@@ -0,0 +1,40 @@
//
// TranslatorTests.swift
// damusTests
//
// Created by Terry Yiu on 1/8/24.
//
import XCTest
@testable import damus
final class TranslatorTests: XCTestCase {
func testShouldTranslateWhenLanguagesAreDifferent() throws {
let userSettingsStore = UserSettingsStore()
userSettingsStore.translation_service = .purple
let translator = Translator(userSettingsStore, purple: DamusPurple(environment: .local_test, keypair: test_keypair))
XCTAssertTrue(translator.shouldTranslate(from: "en", to: "es"))
XCTAssertTrue(translator.shouldTranslate(from: "es", to: "fr"))
}
func testShouldNotTranslateWhenLanguagesAreTheSame() throws {
let userSettingsStore = UserSettingsStore()
userSettingsStore.translation_service = .purple
let translator = Translator(userSettingsStore, purple: DamusPurple(environment: .local_test, keypair: test_keypair))
XCTAssertFalse(translator.shouldTranslate(from: "en", to: "en"))
XCTAssertFalse(translator.shouldTranslate(from: "es", to: "es"))
}
func testShouldNotTranslateWhenNoTranslationServiceSelected() throws {
let userSettingsStore = UserSettingsStore()
userSettingsStore.translation_service = .none
let translator = Translator(userSettingsStore, purple: DamusPurple(environment: .local_test, keypair: test_keypair))
XCTAssertFalse(translator.shouldTranslate(from: "en", to: "es"))
XCTAssertFalse(translator.shouldTranslate(from: "es", to: "fr"))
}
}

View File

@@ -406,31 +406,13 @@ extension NdbNote {
func note_language(_ keypair: Keypair) -> String? {
assert(!Thread.isMainThread, "This function must not be run on the main thread.")
// Rely on Apple's NLLanguageRecognizer to tell us which language it thinks the note is in
// and filter on only the text portions of the content as URLs and hashtags confuse the language recognizer.
let originalBlocks = self.blocks(keypair).blocks
let originalOnlyText = originalBlocks.compactMap {
if case .text(let txt) = $0 {
return txt
}
else {
return nil
}
}
.joined(separator: " ")
// Only accept language recognition hypothesis if there's at least a 50% probability that it's accurate.
let languageRecognizer = NLLanguageRecognizer()
languageRecognizer.processString(originalOnlyText)
guard let locale = languageRecognizer.languageHypotheses(withMaximum: 1).first(where: { $0.value >= 0.5 })?.key.rawValue else {
let nstr: String? = nil
return nstr
guard let languageHypothesis = self.blocks(keypair).languageHypothesis else {
return nil
}
// Remove the variant component and just take the language part as translation services typically only supports the variant-less language.
// Moreover, speakers of one variant can generally understand other variants.
return localeToLanguage(locale)
return localeToLanguage(languageHypothesis.rawValue)
}
var age: TimeInterval {

View File

@@ -281,5 +281,19 @@ final class NdbTests: XCTestCase {
}
func test_note_language_english_with_right_single_quotation_mark() throws {
let note = try XCTUnwrap(NdbNote.owned_from_json(json: test_english_text_note_with_right_single_quotation_mark))
Task {
XCTAssertEqual(note.note_language(test_keypair), "en")
}
}
func test_note_language_non_english() throws {
let note = try XCTUnwrap(NdbNote.owned_from_json(json: test_japanese_text_note))
Task {
XCTAssertEqual(note.note_language(test_keypair), "ja")
}
}
}