From f69e0c660aadda02f69398a8423b2c9466d4f1c1 Mon Sep 17 00:00:00 2001 From: Terry Yiu <963907+tyiu@users.noreply.github.com> Date: Sun, 12 Feb 2023 12:38:42 -0500 Subject: [PATCH] Fix language detection to look at only text and not URLs or hashtags Changelog-Fixed: Improve language detection Closes: #577 --- damus/Components/TranslateView.swift | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/damus/Components/TranslateView.swift b/damus/Components/TranslateView.swift index e6c269fb..58d12085 100644 --- a/damus/Components/TranslateView.swift +++ b/damus/Components/TranslateView.swift @@ -83,9 +83,15 @@ struct TranslateView: View { currentLanguage = Locale.current.languageCode ?? "en" } - // Rely on Apple's NLLanguageRecognizer to tell us which language it thinks the note is in. - let content = event.get_content(damus_state.keypair.privkey) - noteLanguage = NLLanguageRecognizer.dominantLanguage(for: content)?.rawValue ?? currentLanguage + // Rely on Apple's NLLanguageRecognizer to tell us which language it thinks the note is in + // and filter on only the text portions of the content as URLs and hashtags confuse the language recognizer. + let originalBlocks = event.blocks(damus_state.keypair.privkey) + let originalOnlyText = originalBlocks.compactMap { $0.is_text }.joined(separator: " ") + + // Only accept language recognition hypothesis if there's at least a 50% probability that it's accurate. + let languageRecognizer = NLLanguageRecognizer() + languageRecognizer.processString(originalOnlyText) + noteLanguage = languageRecognizer.languageHypotheses(withMaximum: 1).first(where: { $0.value >= 0.5 })?.key.rawValue ?? currentLanguage if let lang = noteLanguage, noteLanguage != currentLanguage { // If the detected dominant language is a variant, remove the variant component and just take the language part as translation services typically only supports the variant-less language. @@ -107,7 +113,14 @@ struct TranslateView: View { do { // If the note language is different from our language, send a translation request. let translator = Translator(damus_state.settings) - translated_note = try await translator.translate(content, from: note_lang, to: currentLanguage) + let originalContent = event.get_content(damus_state.keypair.privkey) + translated_note = try await translator.translate(originalContent, from: note_lang, to: currentLanguage) + + if originalContent == translated_note { + // If the translation is the same as the original, don't bother showing it. + noteLanguage = currentLanguage + translated_note = nil + } } catch { // If for whatever reason we're not able to figure out the language of the note, or translate the note, fail gracefully and do not retry. It's not the end of the world. Don't want to take down someone's translation server with an accidental denial of service attack. noteLanguage = currentLanguage @@ -117,8 +130,8 @@ struct TranslateView: View { if let translated = translated_note { // Render translated note. - let blocks = event.get_blocks(content: translated) - translated_artifacts = render_blocks(blocks: blocks, profiles: damus_state.profiles, privkey: damus_state.keypair.privkey) + let translatedBlocks = event.get_blocks(content: translated) + translated_artifacts = render_blocks(blocks: translatedBlocks, profiles: damus_state.profiles, privkey: damus_state.keypair.privkey) } checkingTranslationStatus = false