// // Trie.swift // // // Created by Terry Yiu on 6/9/24. // import Foundation import OrderedCollections /// Trie is a tree data structure of all the substring permutations of a collection of strings /// optimized for searching for values of type V. /// /// Each node in the tree can have child nodes. /// Each node represents a single character in substrings, /// and each of its child nodes represent the subsequent character in those substrings. /// /// A node that has no children mean that there are no substrings /// with any additional characters beyond the branch of letters leading up to that node. /// /// A node that has values mean that there are strings that end in the character represented by the node /// and contain the substring represented by the branch of letters leading up to that node. /// /// See the article on [Trie](https://en.wikipedia.org/wiki/Trie) on Wikipedia. public class Trie { private var children = OrderedDictionary() /// Separate exact matches from strict substrings so that exact matches appear first in returned results. private var exactMatchValues = OrderedSet() private var substringMatchValues = OrderedSet() private var parent: Trie? var hasChildren: Bool { return !self.children.isEmpty } var hasValues: Bool { return !self.exactMatchValues.isEmpty || !self.substringMatchValues.isEmpty } public init() { } } /// The transformation options that can be applied to the original key when inserting a value into a trie /// as additional keys that map to the value. public struct TrieInsertionOptions: OptionSet { public let rawValue: Int public init(rawValue: Int) { self.rawValue = rawValue } /// Inserts all permutations of non-prefixed substring versions of the original key. public static let includeNonPrefixedMatches = TrieInsertionOptions(rawValue: 1 << 0) /// Inserts the localized lowercase version of the original key. public static let includeCaseInsensitiveMatches = TrieInsertionOptions(rawValue: 1 << 1) /// Inserts the original key with all diactritics removed. public static let includeDiacriticsInsensitiveMatches = TrieInsertionOptions(rawValue: 1 << 2) } public extension Trie { /// Finds the branch that matches the specified key and returns the values from all of its descendant nodes. /// Note: If `key` is an empty string, all values are returned. /// - Parameters: /// - key: The key to find in the trie. /// - Returns: The values that are mapped from matches of `key`. func find(key: String) -> [V] { var currentNode = self // Find branch with matching prefix. for char in key { if let child = currentNode.children[char] { currentNode = child } else { return [] } } // Perform breadth-first search from matching branch and collect values from all descendants. var substringMatches = OrderedSet(currentNode.substringMatchValues) var queue = Array(currentNode.children.values) while !queue.isEmpty { let node = queue.removeFirst() substringMatches.formUnion(node.exactMatchValues) substringMatches.formUnion(node.substringMatchValues) queue.append(contentsOf: node.children.values) } // Prioritize exact matches to be returned first, // and then remove exact matches from the set of partial substring matches that are appended afterward. return Array(currentNode.exactMatchValues) + (substringMatches.subtracting(currentNode.exactMatchValues)) } // swiftlint:disable cyclomatic_complexity /// Inserts a value into this trie for the specified key. /// This function stores all substring endings of the key, not only the key itself. /// Runtime performance is O(n^2) and storage cost is O(n), where n is the number of characters in the key. /// Note: If `key` is an empty string, this operation is effectively a no-op. /// - Parameters: /// - key: The key to insert that maps to `value`. /// - value: The value that is mapped from `key`. /// - options: The options to apply different transformations to `key` for additional insertion. /// - Returns: The list of whole keys that were inserted that map to `value`. func insert(key originalKey: String, value: V, options: TrieInsertionOptions = []) -> [String] { let includeNonPrefixedMatches = options.contains(.includeNonPrefixedMatches) let includeCaseInsensitiveMatches = options.contains(.includeCaseInsensitiveMatches) let includeDiacriticsInsensitiveMatches = options.contains(.includeDiacriticsInsensitiveMatches) var keys = [originalKey] if includeCaseInsensitiveMatches { let localizedLowercase = originalKey.localizedLowercase if localizedLowercase != originalKey { keys.append(localizedLowercase) } } if includeDiacriticsInsensitiveMatches, let keyWithoutDiacritics = originalKey.applyingTransform(.stripDiacritics, reverse: false), keyWithoutDiacritics != originalKey { keys.append(keyWithoutDiacritics) if includeCaseInsensitiveMatches { let localizedLowercaseWithoutDiacritics = keyWithoutDiacritics.localizedLowercase if localizedLowercaseWithoutDiacritics != originalKey { keys.append(localizedLowercaseWithoutDiacritics) } } } for key in keys { // Create root branches for each character of the key to enable substring searches // instead of only just prefix searches. // Hence the nested loop. for keyIndex in 0..