// // File.swift // // // Created by Niklas Amslgruber on 10.06.23. // import Foundation import ArgumentParser import EmojiKit #if os(macOS) struct EmojiDownloader: ParsableCommand, AsyncParsableCommand { static let configuration: CommandConfiguration = CommandConfiguration( commandName: "download", abstract: "Downloads a list of all available emojis and their counts from unicode.rog for the respective unicode version" ) @Argument var path: String @Option(name: .shortAndLong) var version: EmojiManager.Version = .v15 private func getPath() -> String { #if DEBUG var url = URL(filePath: #file) url = url.deletingLastPathComponent().deletingLastPathComponent() url.append(path: "EmojiKit/Resources") return url.absoluteString #else return path #endif } func run() async throws { print("⚙️", "Starting to download all emojis for version \(version.rawValue) from unicode.org...\n") guard let emojiListURL = await getTemporaryURLForEmojiList(version: version) else { print("⚠️", "Could not get content from unicode.org. The emoji list is not available.\n") return } guard let emojiCountsURL = await getTemporaryURLForEmojiCounts(version: version) else { print("⚠️", "Could not get content from unicode.org. The emoji count file is not available.\n") return } print("🎉", "Successfully retrieved temporary URLs for version \(version.rawValue).\n") print("⚙️", "Starting to parse content...\n") var allCLDRAnnotations = [String: Emoji]() for locale in supportedLocales { guard let cldrAnnotationsURL = await getURLForCLDRAnnotations(locale: locale) else { return } guard let cldrAnnotationsDerivedURL = await getURLForCLDRAnnotationsDerived(locale: locale) else { return } print("Trying CLDR data at \(cldrAnnotationsURL)\n") let cldrAnnotationsHandle = try FileHandle(forReadingFrom: cldrAnnotationsURL) guard let cldrAnnotationsData = try cldrAnnotationsHandle.readToEnd() else { print("⚠️", "Could not read CLDR annotations data.\n") return } let cldrAnnotationsMap = emojisMap(data: cldrAnnotationsData, locale: locale) ?? [:] print("Trying CLDR data at \(cldrAnnotationsDerivedURL)\n") let cldrAnnotationsDerivedHandle = try FileHandle(forReadingFrom: cldrAnnotationsDerivedURL) guard let cldrAnnotationsDerivedData = try cldrAnnotationsDerivedHandle.readToEnd() else { print("⚠️", "Could not read CLDR annotations derived data.\n") return } let cldrAnnotationsDerivedMap = emojisMap(data: cldrAnnotationsDerivedData, locale: locale) ?? [:] allCLDRAnnotations.merge(cldrAnnotationsMap) { (current, new) in let combinedKeywords = current.localizedKeywords.merging(new.localizedKeywords) { (current, _) in current } return Emoji(value: current.value, localizedKeywords: combinedKeywords) } allCLDRAnnotations.merge(cldrAnnotationsDerivedMap) { (current, new) in let combinedKeywords = current.localizedKeywords.merging(new.localizedKeywords) { (current, _) in current } return Emoji(value: current.value, localizedKeywords: combinedKeywords) } } let parser = UnicodeParser() do { let emojisByCategory: [UnicodeEmojiCategory] = try await parser.parseEmojiList(for: emojiListURL, emojisMap: allCLDRAnnotations) let emojiCounts: [UnicodeEmojiCategory.Name: Int] = parser.parseCountHTML(for: emojiCountsURL) for category in emojisByCategory { assert(emojiCounts[category.name] == category.emojis.count) } print("🎉", "Successfully parsed emojis and matched counts to the count file.\n") save(data: emojisByCategory, for: version) print("🎉", "Successfully saved emojis to file.\n") } catch { print("⚠️", "Could not parse emoji lists or emoji counts. Process failed with: \(error).\n") } } func emojisMap(data: Data, locale: String) -> [String: Emoji]? { let parser = XMLParser(data: data) let handler = CLDRAnnotationsXMLHandler(locale: locale) parser.delegate = handler if parser.parse() { return handler.emojisMap } else { print("Failed to parse XML\n") return nil } } let supportedLocales = [ "af", "am", "ar", "ar_SA", "as", "ast", "az", "be", "bew", "bg", "bgn", "bn", "br", "bs", "ca", "ccp", "ceb", "chr", "ckb", "cs", "cv", "cy", "da", "de", "de_CH", "doi", "dsb", "el", "en", "en_001", "en_AU", "en_CA", "en_GB", "en_IN", "es", "es_419", "es_MX", "es_US", "et", "eu", "fa", "ff", "ff_Adlm", "fi", "fil", "fo", "fr", "fr_CA", "ga", "gd", "gl", "gu", "ha", "ha_NE", "he", "hi", "hi_Latn", "hr", "hsb", "hu", "hy", "ia", "id", "ig", "is", "it", "ja", "jv", "ka", "kab", "kk", "kl", "km", "kn", "ko", "kok", "ku", "ky", "lb", "lij", "lo", "lt", "lv", "mai", "mi", "mk", "ml", "mn", "mni", "mr", "ms", "mt", "my", "nb", "ne", "nl", "nn", "no", "nso", "oc", "om", "or", "pa", "pa_Arab", "pcm", "pl", "ps", "pt", "pt_PT", "qu", "quc", "rhg", "rm", "ro", "root", "ru", "rw", "sa", "sat", "sc", "sd", "si", "sk", "sl", "so", "sq", "sr", "sr_Cyrl", "sr_Cyrl_BA", "sr_Latn", "sr_Latn_BA", "su", "sv", "sw", "sw_KE", "ta", "te", "tg", "th", "ti", "tk", "tn", "to", "tr", "tt", "ug", "uk", "ur", "uz", "vi", "wo", "xh", "yo", "yo_BJ", "yue", "yue_Hans", "zh", "zh_Hant", "zh_Hant_HK", "zu" ] func getURLForCLDRAnnotations(locale: String) async -> URL? { return await load(urlString: "https://raw.githubusercontent.com/unicode-org/cldr/main/common/annotations/\(locale).xml") } func getURLForCLDRAnnotationsDerived(locale: String) async -> URL? { return await load(urlString: "https://raw.githubusercontent.com/unicode-org/cldr/main/common/annotationsDerived/\(locale).xml") } func getTemporaryURLForEmojiList(version: EmojiManager.Version) async -> URL? { if version == .v15 { return await load(urlString: "https://raw.githubusercontent.com/unicode-org/cldr/ed4f82917078fb71f093977a973b30a6151fa28b/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/emoji/emoji-test.txt") } else { return await load(urlString: "https://unicode.org/Public/emoji/\(version.versionIdentifier)/emoji-test.txt") } } func getTemporaryURLForEmojiCounts(version: EmojiManager.Version) async -> URL? { return await load(urlString: "https://www.unicode.org/emoji/charts-\(version.versionIdentifier)/emoji-counts.html") } private func load(urlString: String) async -> URL? { guard let url = URL(string: urlString) else { return nil } let session = URLSession(configuration: .default) do { let (tmpFileURL, response) = try await session.download(from: url) guard let statusCode = (response as? HTTPURLResponse)?.statusCode, statusCode == 200 else { print("⚠️", "Failed with a non 200 HTTP status") return nil } return tmpFileURL } catch { print("⚠️", error) return nil } } private func save(data: [UnicodeEmojiCategory], for: EmojiManager.Version) { let directory = getPath() let encoder = JSONEncoder() encoder.outputFormatting = .prettyPrinted guard let result = try? encoder.encode(data) else { print("⚠️", "Couldn't encode emoji categories.") return } var filePath = URL(filePath: directory) filePath.append(path: "\(version.fileName).json") let jsonString = String(data: result, encoding: .utf8) print("⚙️", "Saving emojis to file \(filePath.absoluteString)...\n") if FileManager.default.fileExists(atPath: filePath.absoluteString) == false { FileManager.default.createFile(atPath: filePath.absoluteString, contents: nil) } do { try jsonString?.write(to: filePath, atomically: true, encoding: .utf8) } catch { print("⚠️", error) } } } #endif extension EmojiManager.Version: ExpressibleByArgument {}