360 lines
9.7 KiB
Swift
360 lines
9.7 KiB
Swift
//
|
|
// File.swift
|
|
//
|
|
//
|
|
// Created by Niklas Amslgruber on 10.06.23.
|
|
//
|
|
|
|
import Foundation
|
|
import ArgumentParser
|
|
import EmojiKit
|
|
|
|
#if os(macOS)
|
|
struct EmojiDownloader: ParsableCommand, AsyncParsableCommand {
|
|
|
|
static let configuration: CommandConfiguration = CommandConfiguration(
|
|
commandName: "download",
|
|
abstract: "Downloads a list of all available emojis and their counts from unicode.rog for the respective unicode version"
|
|
)
|
|
|
|
@Argument var path: String
|
|
@Option(name: .shortAndLong) var version: EmojiManager.Version = .v15
|
|
|
|
private func getPath() -> String {
|
|
#if DEBUG
|
|
var url = URL(filePath: #file)
|
|
url = url.deletingLastPathComponent().deletingLastPathComponent()
|
|
url.append(path: "EmojiKit/Resources")
|
|
|
|
return url.absoluteString
|
|
#else
|
|
return path
|
|
#endif
|
|
}
|
|
|
|
func run() async throws {
|
|
print("⚙️", "Starting to download all emojis for version \(version.rawValue) from unicode.org...\n")
|
|
|
|
guard let emojiListURL = await getTemporaryURLForEmojiList(version: version) else {
|
|
print("⚠️", "Could not get content from unicode.org. The emoji list is not available.\n")
|
|
return
|
|
}
|
|
|
|
guard let emojiCountsURL = await getTemporaryURLForEmojiCounts(version: version) else {
|
|
print("⚠️", "Could not get content from unicode.org. The emoji count file is not available.\n")
|
|
return
|
|
}
|
|
|
|
print("🎉", "Successfully retrieved temporary URLs for version \(version.rawValue).\n")
|
|
|
|
print("⚙️", "Starting to parse content...\n")
|
|
|
|
var allCLDRAnnotations = [String: Emoji]()
|
|
for locale in supportedLocales {
|
|
guard let cldrAnnotationsURL = await getURLForCLDRAnnotations(locale: locale) else {
|
|
return
|
|
}
|
|
|
|
guard let cldrAnnotationsDerivedURL = await getURLForCLDRAnnotationsDerived(locale: locale) else {
|
|
return
|
|
}
|
|
|
|
print("Trying CLDR data at \(cldrAnnotationsURL)\n")
|
|
|
|
let cldrAnnotationsHandle = try FileHandle(forReadingFrom: cldrAnnotationsURL)
|
|
guard let cldrAnnotationsData = try cldrAnnotationsHandle.readToEnd() else {
|
|
print("⚠️", "Could not read CLDR annotations data.\n")
|
|
return
|
|
}
|
|
|
|
let cldrAnnotationsMap = emojisMap(data: cldrAnnotationsData, locale: locale) ?? [:]
|
|
|
|
print("Trying CLDR data at \(cldrAnnotationsDerivedURL)\n")
|
|
|
|
let cldrAnnotationsDerivedHandle = try FileHandle(forReadingFrom: cldrAnnotationsDerivedURL)
|
|
guard let cldrAnnotationsDerivedData = try cldrAnnotationsDerivedHandle.readToEnd() else {
|
|
print("⚠️", "Could not read CLDR annotations derived data.\n")
|
|
return
|
|
}
|
|
let cldrAnnotationsDerivedMap = emojisMap(data: cldrAnnotationsDerivedData, locale: locale) ?? [:]
|
|
|
|
allCLDRAnnotations.merge(cldrAnnotationsMap) { (current, new) in
|
|
let combinedKeywords = current.localizedKeywords.merging(new.localizedKeywords) { (current, _) in current }
|
|
return Emoji(value: current.value, localizedKeywords: combinedKeywords)
|
|
}
|
|
|
|
allCLDRAnnotations.merge(cldrAnnotationsDerivedMap) { (current, new) in
|
|
let combinedKeywords = current.localizedKeywords.merging(new.localizedKeywords) { (current, _) in current }
|
|
return Emoji(value: current.value, localizedKeywords: combinedKeywords)
|
|
}
|
|
}
|
|
|
|
let parser = UnicodeParser()
|
|
|
|
do {
|
|
let emojisByCategory: [UnicodeEmojiCategory] = try await parser.parseEmojiList(for: emojiListURL, emojisMap: allCLDRAnnotations)
|
|
|
|
let emojiCounts: [UnicodeEmojiCategory.Name: Int] = parser.parseCountHTML(for: emojiCountsURL)
|
|
|
|
for category in emojisByCategory {
|
|
assert(emojiCounts[category.name] == category.emojis.count)
|
|
}
|
|
|
|
print("🎉", "Successfully parsed emojis and matched counts to the count file.\n")
|
|
|
|
save(data: emojisByCategory, for: version)
|
|
|
|
print("🎉", "Successfully saved emojis to file.\n")
|
|
|
|
} catch {
|
|
print("⚠️", "Could not parse emoji lists or emoji counts. Process failed with: \(error).\n")
|
|
}
|
|
}
|
|
|
|
func emojisMap(data: Data, locale: String) -> [String: Emoji]? {
|
|
let parser = XMLParser(data: data)
|
|
let handler = CLDRAnnotationsXMLHandler(locale: locale)
|
|
parser.delegate = handler
|
|
|
|
if parser.parse() {
|
|
return handler.emojisMap
|
|
} else {
|
|
print("Failed to parse XML\n")
|
|
return nil
|
|
}
|
|
}
|
|
|
|
let supportedLocales = [
|
|
"af",
|
|
"am",
|
|
"ar",
|
|
"ar_SA",
|
|
"as",
|
|
"ast",
|
|
"az",
|
|
"be",
|
|
"bew",
|
|
"bg",
|
|
"bgn",
|
|
"bn",
|
|
"br",
|
|
"bs",
|
|
"ca",
|
|
"ccp",
|
|
"ceb",
|
|
"chr",
|
|
"ckb",
|
|
"cs",
|
|
"cv",
|
|
"cy",
|
|
"da",
|
|
"de",
|
|
"de_CH",
|
|
"doi",
|
|
"dsb",
|
|
"el",
|
|
"en",
|
|
"en_001",
|
|
"en_AU",
|
|
"en_CA",
|
|
"en_GB",
|
|
"en_IN",
|
|
"es",
|
|
"es_419",
|
|
"es_MX",
|
|
"es_US",
|
|
"et",
|
|
"eu",
|
|
"fa",
|
|
"ff",
|
|
"ff_Adlm",
|
|
"fi",
|
|
"fil",
|
|
"fo",
|
|
"fr",
|
|
"fr_CA",
|
|
"ga",
|
|
"gd",
|
|
"gl",
|
|
"gu",
|
|
"ha",
|
|
"ha_NE",
|
|
"he",
|
|
"hi",
|
|
"hi_Latn",
|
|
"hr",
|
|
"hsb",
|
|
"hu",
|
|
"hy",
|
|
"ia",
|
|
"id",
|
|
"ig",
|
|
"is",
|
|
"it",
|
|
"ja",
|
|
"jv",
|
|
"ka",
|
|
"kab",
|
|
"kk",
|
|
"kl",
|
|
"km",
|
|
"kn",
|
|
"ko",
|
|
"kok",
|
|
"ku",
|
|
"ky",
|
|
"lb",
|
|
"lij",
|
|
"lo",
|
|
"lt",
|
|
"lv",
|
|
"mai",
|
|
"mi",
|
|
"mk",
|
|
"ml",
|
|
"mn",
|
|
"mni",
|
|
"mr",
|
|
"ms",
|
|
"mt",
|
|
"my",
|
|
"nb",
|
|
"ne",
|
|
"nl",
|
|
"nn",
|
|
"no",
|
|
"nso",
|
|
"oc",
|
|
"om",
|
|
"or",
|
|
"pa",
|
|
"pa_Arab",
|
|
"pcm",
|
|
"pl",
|
|
"ps",
|
|
"pt",
|
|
"pt_PT",
|
|
"qu",
|
|
"quc",
|
|
"rhg",
|
|
"rm",
|
|
"ro",
|
|
"root",
|
|
"ru",
|
|
"rw",
|
|
"sa",
|
|
"sat",
|
|
"sc",
|
|
"sd",
|
|
"si",
|
|
"sk",
|
|
"sl",
|
|
"so",
|
|
"sq",
|
|
"sr",
|
|
"sr_Cyrl",
|
|
"sr_Cyrl_BA",
|
|
"sr_Latn",
|
|
"sr_Latn_BA",
|
|
"su",
|
|
"sv",
|
|
"sw",
|
|
"sw_KE",
|
|
"ta",
|
|
"te",
|
|
"tg",
|
|
"th",
|
|
"ti",
|
|
"tk",
|
|
"tn",
|
|
"to",
|
|
"tr",
|
|
"tt",
|
|
"ug",
|
|
"uk",
|
|
"ur",
|
|
"uz",
|
|
"vi",
|
|
"wo",
|
|
"xh",
|
|
"yo",
|
|
"yo_BJ",
|
|
"yue",
|
|
"yue_Hans",
|
|
"zh",
|
|
"zh_Hant",
|
|
"zh_Hant_HK",
|
|
"zu"
|
|
]
|
|
|
|
func getURLForCLDRAnnotations(locale: String) async -> URL? {
|
|
return await load(urlString: "https://raw.githubusercontent.com/unicode-org/cldr/main/common/annotations/\(locale).xml")
|
|
}
|
|
|
|
func getURLForCLDRAnnotationsDerived(locale: String) async -> URL? {
|
|
return await load(urlString: "https://raw.githubusercontent.com/unicode-org/cldr/main/common/annotationsDerived/\(locale).xml")
|
|
}
|
|
|
|
func getTemporaryURLForEmojiList(version: EmojiManager.Version) async -> URL? {
|
|
if version == .v15 {
|
|
return await load(urlString: "https://raw.githubusercontent.com/unicode-org/cldr/ed4f82917078fb71f093977a973b30a6151fa28b/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/emoji/emoji-test.txt")
|
|
} else {
|
|
return await load(urlString: "https://unicode.org/Public/emoji/\(version.versionIdentifier)/emoji-test.txt")
|
|
}
|
|
}
|
|
|
|
func getTemporaryURLForEmojiCounts(version: EmojiManager.Version) async -> URL? {
|
|
return await load(urlString: "https://www.unicode.org/emoji/charts-\(version.versionIdentifier)/emoji-counts.html")
|
|
}
|
|
|
|
private func load(urlString: String) async -> URL? {
|
|
guard let url = URL(string: urlString) else {
|
|
return nil
|
|
}
|
|
let session = URLSession(configuration: .default)
|
|
|
|
do {
|
|
let (tmpFileURL, response) = try await session.download(from: url)
|
|
|
|
guard let statusCode = (response as? HTTPURLResponse)?.statusCode, statusCode == 200 else {
|
|
print("⚠️", "Failed with a non 200 HTTP status")
|
|
return nil
|
|
}
|
|
return tmpFileURL
|
|
} catch {
|
|
print("⚠️", error)
|
|
return nil
|
|
}
|
|
}
|
|
|
|
private func save(data: [UnicodeEmojiCategory], for: EmojiManager.Version) {
|
|
let directory = getPath()
|
|
|
|
let encoder = JSONEncoder()
|
|
encoder.outputFormatting = .prettyPrinted
|
|
|
|
guard let result = try? encoder.encode(data) else {
|
|
print("⚠️", "Couldn't encode emoji categories.")
|
|
return
|
|
}
|
|
|
|
var filePath = URL(filePath: directory)
|
|
filePath.append(path: "\(version.fileName).json")
|
|
let jsonString = String(data: result, encoding: .utf8)
|
|
|
|
print("⚙️", "Saving emojis to file \(filePath.absoluteString)...\n")
|
|
|
|
if FileManager.default.fileExists(atPath: filePath.absoluteString) == false {
|
|
FileManager.default.createFile(atPath: filePath.absoluteString, contents: nil)
|
|
}
|
|
|
|
do {
|
|
try jsonString?.write(to: filePath, atomically: true, encoding: .utf8)
|
|
} catch {
|
|
print("⚠️", error)
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
extension EmojiManager.Version: ExpressibleByArgument {}
|