Compare commits
2 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
4c50bff6c1
|
|||
|
0bb65eec3d
|
@@ -13,13 +13,18 @@ let package = Package(
|
|||||||
],
|
],
|
||||||
dependencies: [
|
dependencies: [
|
||||||
// Dependencies declare other packages that this package depends on.
|
// Dependencies declare other packages that this package depends on.
|
||||||
.package(url: "https://github.com/apple/swift-docc-plugin.git", from: "1.3.0")
|
.package(url: "https://github.com/apple/swift-docc-plugin.git", from: "1.3.0"),
|
||||||
|
.package(url: "https://github.com/apple/swift-collections.git", .upToNextMajor(from: "1.1.1"))
|
||||||
],
|
],
|
||||||
targets: [
|
targets: [
|
||||||
// Targets are the basic building blocks of a package, defining a module or a test suite.
|
// Targets are the basic building blocks of a package, defining a module or a test suite.
|
||||||
// Targets can depend on other targets in this package and products from dependencies.
|
// Targets can depend on other targets in this package and products from dependencies.
|
||||||
.target(
|
.target(
|
||||||
name: "SwiftTrie"),
|
name: "SwiftTrie",
|
||||||
|
dependencies: [
|
||||||
|
.product(name: "OrderedCollections", package: "swift-collections")
|
||||||
|
]
|
||||||
|
),
|
||||||
.testTarget(
|
.testTarget(
|
||||||
name: "SwiftTrieTests",
|
name: "SwiftTrieTests",
|
||||||
dependencies: ["SwiftTrie"])
|
dependencies: ["SwiftTrie"])
|
||||||
|
|||||||
@@ -6,6 +6,7 @@
|
|||||||
//
|
//
|
||||||
|
|
||||||
import Foundation
|
import Foundation
|
||||||
|
import OrderedCollections
|
||||||
|
|
||||||
/// Trie is a tree data structure of all the substring permutations of a collection of strings
|
/// Trie is a tree data structure of all the substring permutations of a collection of strings
|
||||||
/// optimized for searching for values of type V.
|
/// optimized for searching for values of type V.
|
||||||
@@ -22,11 +23,11 @@ import Foundation
|
|||||||
///
|
///
|
||||||
/// See the article on [Trie](https://en.wikipedia.org/wiki/Trie) on Wikipedia.
|
/// See the article on [Trie](https://en.wikipedia.org/wiki/Trie) on Wikipedia.
|
||||||
public class Trie<V: Hashable> {
|
public class Trie<V: Hashable> {
|
||||||
private var children: [Character: Trie] = [:]
|
private var children = OrderedDictionary<Character, Trie>()
|
||||||
|
|
||||||
/// Separate exact matches from strict substrings so that exact matches appear first in returned results.
|
/// Separate exact matches from strict substrings so that exact matches appear first in returned results.
|
||||||
private var exactMatchValues = Set<V>()
|
private var exactMatchValues = OrderedSet<V>()
|
||||||
private var substringMatchValues = Set<V>()
|
private var substringMatchValues = OrderedSet<V>()
|
||||||
|
|
||||||
private var parent: Trie?
|
private var parent: Trie?
|
||||||
|
|
||||||
@@ -41,6 +42,25 @@ public class Trie<V: Hashable> {
|
|||||||
public init() { }
|
public init() { }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// The transformation options that can be applied to the original key when inserting a value into a trie
|
||||||
|
/// as additional keys that map to the value.
|
||||||
|
public struct TrieInsertionOptions: OptionSet {
|
||||||
|
public let rawValue: Int
|
||||||
|
|
||||||
|
public init(rawValue: Int) {
|
||||||
|
self.rawValue = rawValue
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Inserts all permutations of non-prefixed substring versions of the original key.
|
||||||
|
public static let includeNonPrefixedMatches = TrieInsertionOptions(rawValue: 1 << 0)
|
||||||
|
|
||||||
|
/// Inserts the localized lowercase version of the original key.
|
||||||
|
public static let includeCaseInsensitiveMatches = TrieInsertionOptions(rawValue: 1 << 1)
|
||||||
|
|
||||||
|
/// Inserts the original key with all diactritics removed.
|
||||||
|
public static let includeDiacriticsInsensitiveMatches = TrieInsertionOptions(rawValue: 1 << 2)
|
||||||
|
}
|
||||||
|
|
||||||
public extension Trie {
|
public extension Trie {
|
||||||
/// Finds the branch that matches the specified key and returns the values from all of its descendant nodes.
|
/// Finds the branch that matches the specified key and returns the values from all of its descendant nodes.
|
||||||
/// Note: If `key` is an empty string, all values are returned.
|
/// Note: If `key` is an empty string, all values are returned.
|
||||||
@@ -60,7 +80,7 @@ public extension Trie {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Perform breadth-first search from matching branch and collect values from all descendants.
|
// Perform breadth-first search from matching branch and collect values from all descendants.
|
||||||
var substringMatches = Set<V>(currentNode.substringMatchValues)
|
var substringMatches = OrderedSet<V>(currentNode.substringMatchValues)
|
||||||
var queue = Array(currentNode.children.values)
|
var queue = Array(currentNode.children.values)
|
||||||
|
|
||||||
while !queue.isEmpty {
|
while !queue.isEmpty {
|
||||||
@@ -75,6 +95,7 @@ public extension Trie {
|
|||||||
return Array(currentNode.exactMatchValues) + (substringMatches.subtracting(currentNode.exactMatchValues))
|
return Array(currentNode.exactMatchValues) + (substringMatches.subtracting(currentNode.exactMatchValues))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// swiftlint:disable cyclomatic_complexity
|
||||||
/// Inserts a value into this trie for the specified key.
|
/// Inserts a value into this trie for the specified key.
|
||||||
/// This function stores all substring endings of the key, not only the key itself.
|
/// This function stores all substring endings of the key, not only the key itself.
|
||||||
/// Runtime performance is O(n^2) and storage cost is O(n), where n is the number of characters in the key.
|
/// Runtime performance is O(n^2) and storage cost is O(n), where n is the number of characters in the key.
|
||||||
@@ -82,9 +103,34 @@ public extension Trie {
|
|||||||
/// - Parameters:
|
/// - Parameters:
|
||||||
/// - key: The key to insert that maps to `value`.
|
/// - key: The key to insert that maps to `value`.
|
||||||
/// - value: The value that is mapped from `key`.
|
/// - value: The value that is mapped from `key`.
|
||||||
/// - includeNonPrefixedMatches: Whether the key and value should be inserted to allow for non-prefixed matches.
|
/// - options: The options to apply different transformations to `key` for additional insertion.
|
||||||
/// By default, it is `false`. If it is `true`, more memory will be used.
|
/// - Returns: The list of whole keys that were inserted that map to `value`.
|
||||||
func insert(key: String, value: V, includeNonPrefixedMatches: Bool = false) {
|
func insert(key originalKey: String, value: V, options: TrieInsertionOptions = []) -> [String] {
|
||||||
|
let includeNonPrefixedMatches = options.contains(.includeNonPrefixedMatches)
|
||||||
|
let includeCaseInsensitiveMatches = options.contains(.includeCaseInsensitiveMatches)
|
||||||
|
let includeDiacriticsInsensitiveMatches = options.contains(.includeDiacriticsInsensitiveMatches)
|
||||||
|
|
||||||
|
var keys = [originalKey]
|
||||||
|
if includeCaseInsensitiveMatches {
|
||||||
|
let localizedLowercase = originalKey.localizedLowercase
|
||||||
|
if localizedLowercase != originalKey {
|
||||||
|
keys.append(localizedLowercase)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if includeDiacriticsInsensitiveMatches,
|
||||||
|
let keyWithoutDiacritics = originalKey.applyingTransform(.stripDiacritics, reverse: false),
|
||||||
|
keyWithoutDiacritics != originalKey {
|
||||||
|
keys.append(keyWithoutDiacritics)
|
||||||
|
|
||||||
|
if includeCaseInsensitiveMatches {
|
||||||
|
let localizedLowercaseWithoutDiacritics = keyWithoutDiacritics.localizedLowercase
|
||||||
|
if localizedLowercaseWithoutDiacritics != originalKey {
|
||||||
|
keys.append(localizedLowercaseWithoutDiacritics)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for key in keys {
|
||||||
// Create root branches for each character of the key to enable substring searches
|
// Create root branches for each character of the key to enable substring searches
|
||||||
// instead of only just prefix searches.
|
// instead of only just prefix searches.
|
||||||
// Hence the nested loop.
|
// Hence the nested loop.
|
||||||
@@ -104,19 +150,23 @@ public extension Trie {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if keyIndex == 0 {
|
if keyIndex == 0 {
|
||||||
currentNode.exactMatchValues.insert(value)
|
currentNode.exactMatchValues.append(value)
|
||||||
|
|
||||||
// If includeNonPrefixedMatches is true, the first character of the key can be the only root branch
|
// If includeNonPrefixedMatches is true, the first character of the key can be the only root branch
|
||||||
// and we terminate the loop early.
|
// and we terminate the loop early.
|
||||||
if !includeNonPrefixedMatches {
|
if !includeNonPrefixedMatches {
|
||||||
return
|
break
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
currentNode.substringMatchValues.insert(value)
|
currentNode.substringMatchValues.append(value)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return keys
|
||||||
|
}
|
||||||
|
// swiftlint:enable cyclomatic_complexity
|
||||||
|
|
||||||
/// Removes a value from this trie for the specified key.
|
/// Removes a value from this trie for the specified key.
|
||||||
/// - Parameters:
|
/// - Parameters:
|
||||||
/// - key: The key to remove.
|
/// - key: The key to remove.
|
||||||
|
|||||||
@@ -13,13 +13,13 @@ final class TrieTests: XCTestCase {
|
|||||||
func testFindPrefixedMatches() throws {
|
func testFindPrefixedMatches() throws {
|
||||||
let trie = Trie<String>()
|
let trie = Trie<String>()
|
||||||
|
|
||||||
let keys = ["foobar", "food", "foo", "somethingelse", "duplicate", "duplicate"]
|
let keys = ["foobar", "food", "foo", "somethingelse", "duplicate", "duplicate", "first: second", "août"]
|
||||||
keys.forEach {
|
keys.forEach {
|
||||||
trie.insert(key: $0, value: $0)
|
XCTAssertEqual(trie.insert(key: $0, value: $0), [$0])
|
||||||
}
|
}
|
||||||
|
|
||||||
let allResults = trie.find(key: "")
|
let allResults = trie.find(key: "")
|
||||||
XCTAssertEqual(Set(allResults), Set(["foobar", "food", "foo", "somethingelse", "duplicate"]))
|
XCTAssertEqual(Set(allResults), Set(keys))
|
||||||
|
|
||||||
let fooResults = trie.find(key: "foo")
|
let fooResults = trie.find(key: "foo")
|
||||||
XCTAssertEqual(fooResults.first, "foo")
|
XCTAssertEqual(fooResults.first, "foo")
|
||||||
@@ -29,11 +29,20 @@ final class TrieTests: XCTestCase {
|
|||||||
XCTAssertEqual(foodResults, ["food"])
|
XCTAssertEqual(foodResults, ["food"])
|
||||||
|
|
||||||
let ooResults = trie.find(key: "oo")
|
let ooResults = trie.find(key: "oo")
|
||||||
XCTAssertEqual(Set(ooResults), Set([]))
|
XCTAssertEqual(ooResults, [])
|
||||||
|
|
||||||
|
let multipleWordsResults = trie.find(key: "second")
|
||||||
|
XCTAssertEqual(multipleWordsResults, [])
|
||||||
|
|
||||||
let notFoundResults = trie.find(key: "notfound")
|
let notFoundResults = trie.find(key: "notfound")
|
||||||
XCTAssertEqual(notFoundResults, [])
|
XCTAssertEqual(notFoundResults, [])
|
||||||
|
|
||||||
|
let caseSensitiveResults = trie.find(key: "FOO")
|
||||||
|
XCTAssertEqual(caseSensitiveResults, [])
|
||||||
|
|
||||||
|
let diacriticResults = trie.find(key: "aout")
|
||||||
|
XCTAssertEqual(diacriticResults, [])
|
||||||
|
|
||||||
// Sanity check that the root node has children.
|
// Sanity check that the root node has children.
|
||||||
XCTAssertTrue(trie.hasChildren)
|
XCTAssertTrue(trie.hasChildren)
|
||||||
|
|
||||||
@@ -44,13 +53,13 @@ final class TrieTests: XCTestCase {
|
|||||||
func testFindNonPrefixedMatches() throws {
|
func testFindNonPrefixedMatches() throws {
|
||||||
let trie = Trie<String>()
|
let trie = Trie<String>()
|
||||||
|
|
||||||
let keys = ["foobar", "food", "foo", "somethingelse", "duplicate", "duplicate"]
|
let keys = ["foobar", "food", "foo", "somethingelse", "duplicate", "duplicate", "first: second", "août"]
|
||||||
keys.forEach {
|
keys.forEach {
|
||||||
trie.insert(key: $0, value: $0, includeNonPrefixedMatches: true)
|
XCTAssertEqual(trie.insert(key: $0, value: $0, options: [.includeNonPrefixedMatches]), [$0])
|
||||||
}
|
}
|
||||||
|
|
||||||
let allResults = trie.find(key: "")
|
let allResults = trie.find(key: "")
|
||||||
XCTAssertEqual(Set(allResults), Set(["foobar", "food", "foo", "somethingelse", "duplicate"]))
|
XCTAssertEqual(Set(allResults), Set(keys))
|
||||||
|
|
||||||
let fooResults = trie.find(key: "foo")
|
let fooResults = trie.find(key: "foo")
|
||||||
XCTAssertEqual(fooResults.first, "foo")
|
XCTAssertEqual(fooResults.first, "foo")
|
||||||
@@ -62,12 +71,87 @@ final class TrieTests: XCTestCase {
|
|||||||
let ooResults = trie.find(key: "oo")
|
let ooResults = trie.find(key: "oo")
|
||||||
XCTAssertEqual(Set(ooResults), Set(["foobar", "food", "foo"]))
|
XCTAssertEqual(Set(ooResults), Set(["foobar", "food", "foo"]))
|
||||||
|
|
||||||
|
let multipleWordsResults = trie.find(key: "second")
|
||||||
|
XCTAssertEqual(multipleWordsResults, ["first: second"])
|
||||||
|
|
||||||
let aResults = trie.find(key: "a")
|
let aResults = trie.find(key: "a")
|
||||||
XCTAssertEqual(Set(aResults), Set(["foobar", "duplicate"]))
|
XCTAssertEqual(Set(aResults), Set(["foobar", "duplicate", "août"]))
|
||||||
|
|
||||||
let notFoundResults = trie.find(key: "notfound")
|
let notFoundResults = trie.find(key: "notfound")
|
||||||
XCTAssertEqual(notFoundResults, [])
|
XCTAssertEqual(notFoundResults, [])
|
||||||
|
|
||||||
|
let caseSensitiveResults = trie.find(key: "FOO")
|
||||||
|
XCTAssertEqual(caseSensitiveResults, [])
|
||||||
|
|
||||||
|
let diacriticResults = trie.find(key: "aout")
|
||||||
|
XCTAssertEqual(diacriticResults, [])
|
||||||
|
|
||||||
|
// Sanity check that the root node has children.
|
||||||
|
XCTAssertTrue(trie.hasChildren)
|
||||||
|
|
||||||
|
// Sanity check that the root node has no values.
|
||||||
|
XCTAssertFalse(trie.hasValues)
|
||||||
|
}
|
||||||
|
|
||||||
|
func testFindCaseInsensitive() throws {
|
||||||
|
let trie = Trie<String>()
|
||||||
|
|
||||||
|
let key = "FoObAr"
|
||||||
|
XCTAssertEqual(trie.insert(key: key, value: key, options: [.includeCaseInsensitiveMatches]), [key, "foobar"])
|
||||||
|
|
||||||
|
let allResults = trie.find(key: "")
|
||||||
|
XCTAssertEqual(Set(allResults), Set([key]))
|
||||||
|
|
||||||
|
let fooResults = trie.find(key: "foo")
|
||||||
|
XCTAssertEqual(fooResults, [key])
|
||||||
|
|
||||||
|
// Sanity check that the root node has children.
|
||||||
|
XCTAssertTrue(trie.hasChildren)
|
||||||
|
|
||||||
|
// Sanity check that the root node has no values.
|
||||||
|
XCTAssertFalse(trie.hasValues)
|
||||||
|
}
|
||||||
|
|
||||||
|
func testFindDiacriticInsensitive() throws {
|
||||||
|
let trie = Trie<String>()
|
||||||
|
|
||||||
|
let key = "Laïcité"
|
||||||
|
XCTAssertEqual(
|
||||||
|
trie.insert(key: key, value: key, options: [.includeDiacriticsInsensitiveMatches]),
|
||||||
|
[key, "Laicite"]
|
||||||
|
)
|
||||||
|
|
||||||
|
let allResults = trie.find(key: "")
|
||||||
|
XCTAssertEqual(Set(allResults), Set([key]))
|
||||||
|
|
||||||
|
let laiciteResults = trie.find(key: "Laicite")
|
||||||
|
XCTAssertEqual(laiciteResults, [key])
|
||||||
|
|
||||||
|
// Sanity check that the root node has children.
|
||||||
|
XCTAssertTrue(trie.hasChildren)
|
||||||
|
|
||||||
|
// Sanity check that the root node has no values.
|
||||||
|
XCTAssertFalse(trie.hasValues)
|
||||||
|
}
|
||||||
|
|
||||||
|
func testFindCaseAndDiacriticInsensitive() throws {
|
||||||
|
let trie = Trie<String>()
|
||||||
|
|
||||||
|
let key = "Laïcité"
|
||||||
|
XCTAssertEqual(
|
||||||
|
trie.insert(
|
||||||
|
key: key,
|
||||||
|
value: key,
|
||||||
|
options: [.includeCaseInsensitiveMatches, .includeDiacriticsInsensitiveMatches]
|
||||||
|
),
|
||||||
|
[key, "laïcité", "Laicite", "laicite"])
|
||||||
|
|
||||||
|
let allResults = trie.find(key: "")
|
||||||
|
XCTAssertEqual(Set(allResults), Set([key]))
|
||||||
|
|
||||||
|
let laiciteResults = trie.find(key: "laicite")
|
||||||
|
XCTAssertEqual(laiciteResults, [key])
|
||||||
|
|
||||||
// Sanity check that the root node has children.
|
// Sanity check that the root node has children.
|
||||||
XCTAssertTrue(trie.hasChildren)
|
XCTAssertTrue(trie.hasChildren)
|
||||||
|
|
||||||
@@ -78,13 +162,26 @@ final class TrieTests: XCTestCase {
|
|||||||
func testRemove() {
|
func testRemove() {
|
||||||
let trie = Trie<String>()
|
let trie = Trie<String>()
|
||||||
|
|
||||||
let keys = ["foobar", "food", "foo", "somethingelse", "duplicate", "duplicate"]
|
let keys = ["FoObAr", "FOOD", "foo", "Sométhingëlse", "duplicate", "duplicate"]
|
||||||
|
var insertedKeysMap = [String: [String]]()
|
||||||
keys.forEach {
|
keys.forEach {
|
||||||
trie.insert(key: $0, value: $0)
|
insertedKeysMap[$0] = trie.insert(key: $0, value: $0,
|
||||||
|
options: [
|
||||||
|
.includeNonPrefixedMatches,
|
||||||
|
.includeCaseInsensitiveMatches,
|
||||||
|
.includeDiacriticsInsensitiveMatches
|
||||||
|
])
|
||||||
}
|
}
|
||||||
|
|
||||||
keys.forEach {
|
XCTAssertEqual(
|
||||||
trie.remove(key: $0, value: $0)
|
Set(insertedKeysMap.values.reduce([], +)),
|
||||||
|
Set(keys + ["foobar", "food", "Somethingelse", "somethingelse", "sométhingëlse"])
|
||||||
|
)
|
||||||
|
|
||||||
|
insertedKeysMap.forEach { originalKey, insertedKeys in
|
||||||
|
insertedKeys.forEach { insertedKey in
|
||||||
|
trie.remove(key: insertedKey, value: originalKey)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let allResults = trie.find(key: "")
|
let allResults = trie.find(key: "")
|
||||||
|
|||||||
Reference in New Issue
Block a user