Files
swift-trie/Sources/SwiftTrie/Trie.swift
2024-06-10 00:11:16 -04:00

158 lines
6.1 KiB
Swift

//
// Trie.swift
//
//
// Created by Terry Yiu on 6/9/24.
//
import Foundation
/// Trie is a tree data structure of all the substring permutations of a collection of strings
/// optimized for searching for values of type V.
///
/// Each node in the tree can have child nodes.
/// Each node represents a single character in substrings,
/// and each of its child nodes represent the subsequent character in those substrings.
///
/// A node that has no children mean that there are no substrings
/// with any additional characters beyond the branch of letters leading up to that node.
///
/// A node that has values mean that there are strings that end in the character represented by the node
/// and contain the substring represented by the branch of letters leading up to that node.
///
/// See the article on [Trie](https://en.wikipedia.org/wiki/Trie) on Wikipedia.
public class Trie<V: Hashable> {
private var children: [Character: Trie] = [:]
/// Separate exact matches from strict substrings so that exact matches appear first in returned results.
private var exactMatchValues = Set<V>()
private var substringMatchValues = Set<V>()
private var parent: Trie?
var hasChildren: Bool {
return !self.children.isEmpty
}
var hasValues: Bool {
return !self.exactMatchValues.isEmpty || !self.substringMatchValues.isEmpty
}
public init() { }
}
public extension Trie {
/// Finds the branch that matches the specified key and returns the values from all of its descendant nodes.
/// Note: If `key` is an empty string, all values are returned.
/// - Parameters:
/// - key: The key to find in the trie.
/// - Returns: The values that are mapped from matches of `key`.
func find(key: String) -> [V] {
var currentNode = self
// Find branch with matching prefix.
for char in key {
if let child = currentNode.children[char] {
currentNode = child
} else {
return []
}
}
// Perform breadth-first search from matching branch and collect values from all descendants.
var substringMatches = Set<V>(currentNode.substringMatchValues)
var queue = Array(currentNode.children.values)
while !queue.isEmpty {
let node = queue.removeFirst()
substringMatches.formUnion(node.exactMatchValues)
substringMatches.formUnion(node.substringMatchValues)
queue.append(contentsOf: node.children.values)
}
// Prioritize exact matches to be returned first,
// and then remove exact matches from the set of partial substring matches that are appended afterward.
return Array(currentNode.exactMatchValues) + (substringMatches.subtracting(currentNode.exactMatchValues))
}
/// Inserts a value into this trie for the specified key.
/// This function stores all substring endings of the key, not only the key itself.
/// Runtime performance is O(n^2) and storage cost is O(n), where n is the number of characters in the key.
/// Note: If `key` is an empty string, this operation is effectively a no-op.
/// - Parameters:
/// - key: The key to insert that maps to `value`.
/// - value: The value that is mapped from `key`.
/// - includeNonPrefixedMatches: Whether the key and value should be inserted to allow for non-prefixed matches.
/// By default, it is `false`. If it is `true`, more memory will be used.
func insert(key: String, value: V, includeNonPrefixedMatches: Bool = false) {
// Create root branches for each character of the key to enable substring searches
// instead of only just prefix searches.
// Hence the nested loop.
for keyIndex in 0..<key.count {
var currentNode = self
// Find branch with matching prefix.
for char in key[key.index(key.startIndex, offsetBy: keyIndex)...] {
if let child = currentNode.children[char] {
currentNode = child
} else {
let child = Trie()
child.parent = currentNode
currentNode.children[char] = child
currentNode = child
}
}
if keyIndex == 0 {
currentNode.exactMatchValues.insert(value)
// If includeNonPrefixedMatches is true, the first character of the key can be the only root branch
// and we terminate the loop early.
if !includeNonPrefixedMatches {
return
}
} else {
currentNode.substringMatchValues.insert(value)
}
}
}
/// Removes a value from this trie for the specified key.
/// - Parameters:
/// - key: The key to remove.
/// - value: The value to remove.
func remove(key: String, value: V) {
for keyIndex in 0..<key.count {
var currentNode = self
var foundLeafNode = true
// Find branch with matching prefix.
for keySubIndex in keyIndex..<key.count {
let char = key[key.index(key.startIndex, offsetBy: keySubIndex)]
if let child = currentNode.children[char] {
currentNode = child
} else {
foundLeafNode = false
break
}
}
if foundLeafNode {
currentNode.exactMatchValues.remove(value)
currentNode.substringMatchValues.remove(value)
// Clean up the tree if this leaf node no longer holds values or children.
for keySubIndex in (keyIndex..<key.count).reversed() {
if let parent = currentNode.parent, !currentNode.hasValues && !currentNode.hasChildren {
currentNode = parent
let char = key[key.index(key.startIndex, offsetBy: keySubIndex)]
currentNode.children.removeValue(forKey: char)
}
}
}
}
}
}