Add support for emoji keywords using CLDR annotation data
This commit is contained in:
@@ -6,6 +6,7 @@
|
||||
//
|
||||
|
||||
import Foundation
|
||||
import OrderedCollections
|
||||
|
||||
public class AppleEmojiCategory: Codable {
|
||||
|
||||
@@ -46,11 +47,11 @@ public class AppleEmojiCategory: Codable {
|
||||
}
|
||||
|
||||
public let name: Name
|
||||
public var values: [String]
|
||||
public var emojis: OrderedDictionary<String, Emoji>
|
||||
|
||||
public init(name: Name, values: [String]) {
|
||||
public init(name: Name, emojis: OrderedDictionary<String, Emoji>) {
|
||||
self.name = name
|
||||
self.values = values
|
||||
self.emojis = emojis
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -0,0 +1,18 @@
|
||||
//
|
||||
// Emoji.swift
|
||||
//
|
||||
//
|
||||
// Created by Terry Yiu on 6/2/24.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
|
||||
public struct Emoji: Codable, Hashable {
|
||||
public let value: String
|
||||
public let keywords: [String]
|
||||
|
||||
public init(value: String, keywords: [String]) {
|
||||
self.value = value
|
||||
self.keywords = keywords
|
||||
}
|
||||
}
|
||||
@@ -37,7 +37,7 @@ public enum EmojiManager {
|
||||
public static func getSupportedVersion() -> Version {
|
||||
if #available(iOS 17.4, *) {
|
||||
return .v15_1
|
||||
} else if #available(iOS 16.4, *) {
|
||||
} else if #available(iOS 16.4, *) {
|
||||
return .v15
|
||||
} else if #available(iOS 15.4, *) {
|
||||
return .v14
|
||||
@@ -59,25 +59,25 @@ public enum EmojiManager {
|
||||
var filteredEmojis: [UnicodeEmojiCategory] = []
|
||||
var appleCategories: [AppleEmojiCategory] = []
|
||||
for category in result {
|
||||
let supportedEmojis = category.values.filter({
|
||||
showAllVariations ? true : isNeutralEmoji(for: $0)
|
||||
let supportedEmojis = category.emojis.filter({
|
||||
showAllVariations ? true : isNeutralEmoji(for: $0.key)
|
||||
})
|
||||
let unicodeCategory = UnicodeEmojiCategory(name: category.name, values: supportedEmojis)
|
||||
let unicodeCategory = UnicodeEmojiCategory(name: category.name, emojis: supportedEmojis)
|
||||
filteredEmojis.append(unicodeCategory)
|
||||
|
||||
if shouldMergeCategory(category), let index = appleCategories.firstIndex(where: { $0.name == .smileysAndPeople }) {
|
||||
if category.name == .smileysAndEmotions {
|
||||
let oldValues = appleCategories[index].values
|
||||
appleCategories[index].values = supportedEmojis
|
||||
appleCategories[index].values.append(contentsOf: oldValues)
|
||||
let oldValues = appleCategories[index].emojis
|
||||
appleCategories[index].emojis = supportedEmojis
|
||||
appleCategories[index].emojis.merge(oldValues) { (current, _) in current }
|
||||
} else {
|
||||
appleCategories[index].values.append(contentsOf: supportedEmojis)
|
||||
appleCategories[index].emojis.merge(supportedEmojis) { (current, _) in current }
|
||||
}
|
||||
} else {
|
||||
guard let appleCategory = unicodeCategory.appleCategory else {
|
||||
continue
|
||||
}
|
||||
appleCategories.append(AppleEmojiCategory(name: appleCategory, values: supportedEmojis))
|
||||
appleCategories.append(AppleEmojiCategory(name: appleCategory, emojis: supportedEmojis))
|
||||
}
|
||||
}
|
||||
return appleCategories.sorted(by: { $0.name.order < $1.name.order })
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -6,6 +6,7 @@
|
||||
//
|
||||
|
||||
import Foundation
|
||||
import OrderedCollections
|
||||
|
||||
public class UnicodeEmojiCategory: Codable {
|
||||
|
||||
@@ -49,11 +50,11 @@ public class UnicodeEmojiCategory: Codable {
|
||||
|
||||
public let name: Name
|
||||
public let appleCategory: AppleEmojiCategory.Name?
|
||||
public var values: [String]
|
||||
public var emojis: OrderedDictionary<String, Emoji>
|
||||
|
||||
public init(name: Name, values: [String]) {
|
||||
public init(name: Name, emojis: OrderedDictionary<String, Emoji>) {
|
||||
self.name = name
|
||||
self.appleCategory = name.appleName
|
||||
self.values = values
|
||||
self.emojis = emojis
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,56 @@
|
||||
//
|
||||
// CLDRAnnotationsXMLHandler.swift
|
||||
//
|
||||
//
|
||||
// Created by Terry Yiu on 6/2/24.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
import EmojiKit
|
||||
|
||||
class CLDRAnnotationsXMLHandler: NSObject, XMLParserDelegate {
|
||||
var currentElement = ""
|
||||
var currentEmoji: Emoji?
|
||||
var emojis = [Emoji]()
|
||||
var currentEmojiValue = ""
|
||||
|
||||
func parser(_ parser: XMLParser, didStartElement elementName: String, namespaceURI: String?, qualifiedName qName: String?, attributes attributeDict: [String : String] = [:]) {
|
||||
if elementName == "annotation" && attributeDict["type"] != "tts" {
|
||||
if let cp = attributeDict["cp"] {
|
||||
currentEmoji = Emoji(value: cp, keywords: [])
|
||||
currentEmojiValue = ""
|
||||
}
|
||||
}
|
||||
currentElement = elementName
|
||||
}
|
||||
|
||||
func parser(_ parser: XMLParser, foundCharacters string: String) {
|
||||
if currentElement == "annotation" {
|
||||
currentEmojiValue += string.trim()
|
||||
}
|
||||
}
|
||||
|
||||
func parser(_ parser: XMLParser, didEndElement elementName: String, namespaceURI: String?, qualifiedName qName: String?) {
|
||||
if elementName == "annotation" {
|
||||
if let emoji = currentEmoji {
|
||||
let values = currentEmojiValue.split(separator: "|").map { $0.trim() }
|
||||
emojis.append(Emoji(value: emoji.value, keywords: values))
|
||||
}
|
||||
}
|
||||
currentElement = ""
|
||||
currentEmojiValue = ""
|
||||
currentEmoji = nil
|
||||
}
|
||||
|
||||
func parser(_ parser: XMLParser, parseErrorOccurred parseError: Error) {
|
||||
print("Parse error: \(parseError.localizedDescription)\n")
|
||||
}
|
||||
|
||||
var emojisMap: [String: Emoji] {
|
||||
emojis.reduce(into: [String: Emoji]()) { $0[$1.value] = $1 }
|
||||
}
|
||||
}
|
||||
|
||||
extension Character {
|
||||
var isEmoji: Bool { unicodeScalars.contains(where: { $0.properties.isEmoji }) }
|
||||
}
|
||||
@@ -24,7 +24,7 @@ struct EmojiDownloader: ParsableCommand, AsyncParsableCommand {
|
||||
#if DEBUG
|
||||
var url = URL(filePath: #file)
|
||||
url = url.deletingLastPathComponent().deletingLastPathComponent()
|
||||
url.append(path: "EmojiKitLibrary/Resources")
|
||||
url.append(path: "EmojiKit/Resources")
|
||||
|
||||
return url.absoluteString
|
||||
#else
|
||||
@@ -35,8 +35,21 @@ struct EmojiDownloader: ParsableCommand, AsyncParsableCommand {
|
||||
func run() async throws {
|
||||
print("⚙️", "Starting to download all emojis for version \(version.rawValue) from unicode.org...\n")
|
||||
|
||||
guard let emojiListURL = await getTemporaryURLForEmojiList(version: version), let emojiCountsURL = await getTemporaryURLForEmojiCounts(version: version) else {
|
||||
print("⚠️", "Could not get content from unicode.org. Either the emoji list or the emoji count file is not available.\n")
|
||||
guard let emojiListURL = await getTemporaryURLForEmojiList(version: version) else {
|
||||
print("⚠️", "Could not get content from unicode.org. The emoji list is not available.\n")
|
||||
return
|
||||
}
|
||||
|
||||
guard let emojiCountsURL = await getTemporaryURLForEmojiCounts(version: version) else {
|
||||
print("⚠️", "Could not get content from unicode.org. The emoji count file is not available.\n")
|
||||
return
|
||||
}
|
||||
|
||||
guard let cldrAnnotationsURL = await getURLForCLDRAnnotations() else {
|
||||
return
|
||||
}
|
||||
|
||||
guard let cldrAnnotationsDerivedURL = await getURLForCLDRAnnotationsDerived() else {
|
||||
return
|
||||
}
|
||||
|
||||
@@ -44,15 +57,36 @@ struct EmojiDownloader: ParsableCommand, AsyncParsableCommand {
|
||||
|
||||
print("⚙️", "Starting to parse content...\n")
|
||||
|
||||
print("Trying CLDR data at \(cldrAnnotationsURL)\n")
|
||||
|
||||
let cldrAnnotationsHandle = try FileHandle(forReadingFrom: cldrAnnotationsURL)
|
||||
guard let cldrAnnotationsData = try cldrAnnotationsHandle.readToEnd() else {
|
||||
print("⚠️", "Could not read CLDR annotations data.\n")
|
||||
return
|
||||
}
|
||||
|
||||
let cldrAnnotationsMap = emojisMap(data: cldrAnnotationsData) ?? [:]
|
||||
|
||||
print("Trying CLDR data at \(cldrAnnotationsDerivedURL)\n")
|
||||
|
||||
let cldrAnnotationsDerivedHandle = try FileHandle(forReadingFrom: cldrAnnotationsDerivedURL)
|
||||
guard let cldrAnnotationsDerivedData = try cldrAnnotationsDerivedHandle.readToEnd() else {
|
||||
print("⚠️", "Could not read CLDR annotations derived data.\n")
|
||||
return
|
||||
}
|
||||
let cldrAnnotationsDerivedMap = emojisMap(data: cldrAnnotationsDerivedData) ?? [:]
|
||||
|
||||
let allCLDRAnnotationsMap = cldrAnnotationsMap.merging(cldrAnnotationsDerivedMap) { (current, _) in current }
|
||||
|
||||
let parser = UnicodeParser()
|
||||
|
||||
do {
|
||||
let emojisByCategory: [UnicodeEmojiCategory] = try await parser.parseEmojiList(for: emojiListURL)
|
||||
let emojisByCategory: [UnicodeEmojiCategory] = try await parser.parseEmojiList(for: emojiListURL, emojisMap: allCLDRAnnotationsMap)
|
||||
|
||||
let emojiCounts: [UnicodeEmojiCategory.Name: Int] = parser.parseCountHTML(for: emojiCountsURL)
|
||||
|
||||
for category in emojisByCategory {
|
||||
assert(emojiCounts[category.name] == category.values.count)
|
||||
assert(emojiCounts[category.name] == category.emojis.count)
|
||||
}
|
||||
|
||||
print("🎉", "Successfully parsed emojis and matched counts to the count file.\n")
|
||||
@@ -66,6 +100,28 @@ struct EmojiDownloader: ParsableCommand, AsyncParsableCommand {
|
||||
}
|
||||
}
|
||||
|
||||
func emojisMap(data: Data) -> [String: Emoji]? {
|
||||
print("emojisMap\n")
|
||||
let parser = XMLParser(data: data)
|
||||
let handler = CLDRAnnotationsXMLHandler()
|
||||
parser.delegate = handler
|
||||
|
||||
if parser.parse() {
|
||||
return handler.emojisMap
|
||||
} else {
|
||||
print("Failed to parse XML\n")
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
func getURLForCLDRAnnotations() async -> URL? {
|
||||
return await load(urlString: "https://raw.githubusercontent.com/unicode-org/cldr/c1dc8c7ef6584668345cf741e51b1722d8114bc8/common/annotations/en.xml")
|
||||
}
|
||||
|
||||
func getURLForCLDRAnnotationsDerived() async -> URL? {
|
||||
return await load(urlString: "https://raw.githubusercontent.com/unicode-org/cldr/c1dc8c7ef6584668345cf741e51b1722d8114bc8/common/annotationsDerived/en.xml")
|
||||
}
|
||||
|
||||
func getTemporaryURLForEmojiList(version: EmojiManager.Version) async -> URL? {
|
||||
return await load(urlString: "https://unicode.org/Public/emoji/\(version.versionIdentifier)/emoji-test.txt")
|
||||
}
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
import Foundation
|
||||
import SwiftSoup
|
||||
import EmojiKit
|
||||
import OrderedCollections
|
||||
|
||||
class UnicodeParser {
|
||||
|
||||
@@ -18,10 +19,29 @@ class UnicodeParser {
|
||||
case minimallyQualified = "minimally-qualified"
|
||||
}
|
||||
|
||||
func parseEmojiList(for fileUrl: URL) async throws -> [UnicodeEmojiCategory] {
|
||||
func parseEmojis(for fileUrl: URL) async throws {
|
||||
URLSession.shared.dataTask(with: fileUrl) { data, response, error in
|
||||
guard let data = data, error == nil else {
|
||||
print("Failed to download data: \(error?.localizedDescription ?? "Unknown error")")
|
||||
return
|
||||
}
|
||||
|
||||
// Parse the downloaded XML data
|
||||
self.parseXML(data: data)
|
||||
}
|
||||
}
|
||||
|
||||
func parseXML(data: Data) {
|
||||
let parser = XMLParser(data: data)
|
||||
let handler = CLDRAnnotationsXMLHandler()
|
||||
parser.delegate = handler
|
||||
parser.parse()
|
||||
}
|
||||
|
||||
func parseEmojiList(for fileUrl: URL, emojisMap: [String: Emoji]) async throws -> [UnicodeEmojiCategory] {
|
||||
let handle = try FileHandle(forReadingFrom: fileUrl)
|
||||
var currentGroup: UnicodeEmojiCategory.Name = .activities
|
||||
var emojisByGroup: [UnicodeEmojiCategory.Name: [String]] = [:]
|
||||
var emojisByGroup: [UnicodeEmojiCategory.Name: OrderedDictionary<String, Emoji>] = [:]
|
||||
|
||||
for try await line in handle.bytes.lines {
|
||||
|
||||
@@ -38,7 +58,7 @@ class UnicodeParser {
|
||||
continue
|
||||
}
|
||||
currentGroup = category
|
||||
emojisByGroup[category] = []
|
||||
emojisByGroup[category] = [:]
|
||||
}
|
||||
|
||||
/// Split line into list of entries
|
||||
@@ -67,13 +87,24 @@ class UnicodeParser {
|
||||
if hexComponents.count > 1 {
|
||||
let multiHexEmoji = hexComponents.compactMap({ $0.asEmoji() }).joined()
|
||||
|
||||
|
||||
if multiHexEmoji.isEmpty == false {
|
||||
emojisByGroup[currentGroup]?.append(multiHexEmoji)
|
||||
if let mapLookup = emojisMap[makeEmojiUnqualified(emoji: multiHexEmoji)] {
|
||||
if mapLookup.keywords.isEmpty == true {
|
||||
print("Could not find keywords in emojis map for multiHex: \(multiHexEmoji)\n")
|
||||
}
|
||||
emojisByGroup[currentGroup]?[multiHexEmoji] = Emoji(value: multiHexEmoji, keywords: mapLookup.keywords)
|
||||
} else {
|
||||
print("Could not find in emojis map at all for multiHex: \(multiHexEmoji)\n")
|
||||
emojisByGroup[currentGroup]?[multiHexEmoji] = Emoji(value: multiHexEmoji, keywords: [])
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if let unicode = hexString.asEmoji(), unicode.isEmpty == false {
|
||||
emojisByGroup[currentGroup]?.append(unicode)
|
||||
if let mapLookup = emojisMap[makeEmojiUnqualified(emoji: unicode)] {
|
||||
emojisByGroup[currentGroup]?[unicode] = Emoji(value: unicode, keywords: mapLookup.keywords)
|
||||
} else {
|
||||
emojisByGroup[currentGroup]?[unicode] = Emoji(value: unicode, keywords: [])
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -82,11 +113,25 @@ class UnicodeParser {
|
||||
var result: [UnicodeEmojiCategory] = []
|
||||
|
||||
for category in UnicodeEmojiCategory.Name.allCases {
|
||||
result.append(UnicodeEmojiCategory(name: category, values: emojisByGroup[category] ?? []))
|
||||
result.append(UnicodeEmojiCategory(name: category, emojis: emojisByGroup[category] ?? OrderedDictionary<String, Emoji>()))
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func makeEmojiUnqualified(emoji: String) -> String {
|
||||
let variationSelector: Character = "\u{FE0F}"
|
||||
var unqualifiedEmoji = ""
|
||||
|
||||
for scalar in emoji.unicodeScalars {
|
||||
let character = Character(scalar)
|
||||
if character != variationSelector {
|
||||
unqualifiedEmoji.append(character)
|
||||
}
|
||||
}
|
||||
|
||||
return unqualifiedEmoji
|
||||
}
|
||||
|
||||
func parseCountHTML(for url: URL) -> [UnicodeEmojiCategory.Name: Int] {
|
||||
do {
|
||||
let html = try String(contentsOf: url)
|
||||
|
||||
Reference in New Issue
Block a user