ndb: switch to nostrdb notes

This is a refactor of the codebase to use a more memory-efficient
representation of notes. It should also be much faster at decoding since
we're using a custom C json parser now.

Changelog-Changed: Improved memory usage and performance when processing events
This commit is contained in:
William Casarin
2023-07-26 08:46:44 -07:00
parent 55bbe8f855
commit cebd1f48ca
110 changed files with 2153 additions and 1799 deletions

View File

@@ -34,7 +34,7 @@ enum NdbData {
}
}
class NdbNote: Equatable, Hashable {
class NdbNote: Encodable, Equatable, Hashable {
// we can have owned notes, but we can also have lmdb virtual-memory mapped notes so its optional
private let owned: Bool
let count: Int
@@ -52,6 +52,14 @@ class NdbNote: Equatable, Hashable {
self.note = note
self.owned = owned_size != nil
self.count = owned_size ?? 0
if let owned_size {
NdbNote.total_ndb_size += Int(owned_size)
NdbNote.notes_created += 1
print("\(NdbNote.notes_created) ndb_notes, \(NdbNote.total_ndb_size) bytes")
}
}
var content: String {
@@ -67,13 +75,17 @@ class NdbNote: Equatable, Hashable {
}
/// NDBTODO: make this into data
var id: String {
hex_encode(Data(buffer: UnsafeBufferPointer(start: ndb_note_id(note), count: 32)))
var id: NoteId {
.init(Data(bytes: ndb_note_id(note), count: 32))
}
var sig: Signature {
.init(Data(bytes: ndb_note_sig(note), count: 64))
}
/// NDBTODO: make this into data
var pubkey: String {
hex_encode(Data(buffer: UnsafeBufferPointer(start: ndb_note_pubkey(note), count: 32)))
var pubkey: Pubkey {
.init(Data(bytes: ndb_note_pubkey(note), count: 32))
}
var created_at: UInt32 {
@@ -90,6 +102,10 @@ class NdbNote: Equatable, Hashable {
deinit {
if self.owned {
NdbNote.total_ndb_size -= Int(count)
NdbNote.notes_created -= 1
print("\(NdbNote.notes_created) ndb_notes, \(NdbNote.total_ndb_size) bytes")
free(note)
}
}
@@ -102,58 +118,100 @@ class NdbNote: Equatable, Hashable {
hasher.combine(id)
}
static let max_note_size: Int = 2 << 18
private enum CodingKeys: String, CodingKey {
case id, sig, tags, pubkey, created_at, kind, content
}
// Implement the `Encodable` protocol
func encode(to encoder: Encoder) throws {
var container = encoder.container(keyedBy: CodingKeys.self)
try container.encode(hex_encode(id.id), forKey: .id)
try container.encode(hex_encode(sig.data), forKey: .sig)
try container.encode(pubkey, forKey: .pubkey)
try container.encode(created_at, forKey: .created_at)
try container.encode(kind, forKey: .kind)
try container.encode(content, forKey: .content)
try container.encode(tags, forKey: .tags)
}
static var total_ndb_size: Int = 0
static var notes_created: Int = 0
init?(content: String, keypair: Keypair, kind: UInt32 = 1, tags: [[String]] = [], createdAt: UInt32 = UInt32(Date().timeIntervalSince1970)) {
var builder = ndb_builder()
let buflen = MAX_NOTE_SIZE
let buf = malloc(buflen)
let idbuf = malloc(buflen)
ndb_builder_init(&builder, buf, Int32(buflen))
guard var pk_raw = hex_decode(keypair.pubkey) else { return nil }
var pk_raw = keypair.pubkey.bytes
ndb_builder_set_pubkey(&builder, &pk_raw)
ndb_builder_set_kind(&builder, UInt32(kind))
ndb_builder_set_created_at(&builder, createdAt)
var ok = true
for tag in tags {
ndb_builder_new_tag(&builder);
for elem in tag {
_ = elem.withCString { eptr in
ndb_builder_push_tag_str(&builder, eptr, Int32(elem.utf8.count))
ok = elem.withCString({ eptr in
return ndb_builder_push_tag_str(&builder, eptr, Int32(elem.utf8.count)) > 0
})
if !ok {
return nil
}
}
}
_ = content.withCString { cptr in
ndb_builder_set_content(&builder, content, Int32(content.utf8.count));
ok = content.withCString { cptr in
return ndb_builder_set_content(&builder, cptr, Int32(content.utf8.count)) > 0
}
if !ok {
return nil
}
var n = UnsafeMutablePointer<ndb_note>?(nil)
let keypair = keypair.privkey.map { sec in
var the_kp: ndb_keypair? = nil
if let sec = keypair.privkey {
var kp = ndb_keypair()
return sec.withCString { secptr in
ndb_decode_key(secptr, &kp)
return kp
memcpy(&kp.secret.0, sec.id.bytes, 32);
if ndb_create_keypair(&kp) <= 0 {
print("bad keypair")
} else {
the_kp = kp
}
}
var len: Int32 = 0
if var keypair {
len = ndb_builder_finalize(&builder, &n, &keypair)
if var the_kp {
len = ndb_builder_finalize(&builder, &n, &the_kp)
} else {
len = ndb_builder_finalize(&builder, &n, nil)
}
free(idbuf)
if len <= 0 {
free(buf)
return nil
}
//guard let n else { return nil }
self.owned = true
self.count = Int(len)
self.note = realloc(n, Int(len)).assumingMemoryBound(to: ndb_note.self)
//self.note = n
let r = realloc(buf, Int(len))
guard let r else {
free(buf)
return nil
}
self.note = r.assumingMemoryBound(to: ndb_note.self)
}
static func owned_from_json(json: String, bufsize: Int = 2 << 18) -> NdbNote? {
@@ -204,13 +262,8 @@ extension NdbNote {
return !too_big
}
//var is_valid_id: Bool {
// return calculate_event_id(ev: self) == self.id
//}
func get_blocks(content: String) -> Blocks {
return parse_note_content_ndb(note: self)
func get_blocks(privkey: Privkey?) -> Blocks {
return parse_note_content(content: .init(note: self, privkey: privkey))
}
func get_inner_event(cache: EventCache) -> NostrEvent? {
@@ -218,28 +271,41 @@ extension NdbNote {
return nil
}
if self.content == "", let ref = self.referenced_ids.first {
if self.content_len == 0, let id = self.referenced_ids.first {
// TODO: raw id cache lookups
let id = ref.id.string()
return cache.lookup(id)
}
// TODO: how to handle inner events?
return nil
//return self.inner_event
return self.inner_event
}
// TODO: References iterator
public var referenced_ids: LazyFilterSequence<References> {
References.ids(tags: self.tags)
public var referenced_ids: References<NoteId> {
References<NoteId>(tags: self.tags)
}
public var referenced_pubkeys: LazyFilterSequence<References> {
References.pubkeys(tags: self.tags)
public var referenced_noterefs: References<NoteRef> {
References<NoteRef>(tags: self.tags)
}
public var referenced_hashtags: LazyFilterSequence<References> {
References.hashtags(tags: self.tags)
public var referenced_follows: References<FollowRef> {
References<FollowRef>(tags: self.tags)
}
public var referenced_pubkeys: References<Pubkey> {
References<Pubkey>(tags: self.tags)
}
public var referenced_hashtags: References<Hashtag> {
References<Hashtag>(tags: self.tags)
}
public var referenced_params: References<ReplaceableParam> {
References<ReplaceableParam>(tags: self.tags)
}
public var references: References<RefId> {
References<RefId>(tags: self.tags)
}
func event_refs(_ privkey: Privkey?) -> [EventRef] {
@@ -262,7 +328,7 @@ extension NdbNote {
func blocks(_ privkey: Privkey?) -> Blocks {
if let bs = _blocks { return bs }
let blocks = get_blocks(content: self.get_content(privkey))
let blocks = get_blocks(privkey: privkey)
self._blocks = blocks
return blocks
}
@@ -273,11 +339,8 @@ extension NdbNote {
return decrypted_content
}
guard let key = privkey else {
return nil
}
guard let our_pubkey = privkey_to_pubkey(privkey: key) else {
guard let privkey,
let our_pubkey = privkey_to_pubkey(privkey: privkey) else {
return nil
}
@@ -285,37 +348,21 @@ extension NdbNote {
var pubkey = self.pubkey
// This is our DM, we need to use the pubkey of the person we're talking to instead
if our_pubkey == pubkey {
guard let refkey = self.referenced_pubkeys.first else {
return nil
}
pubkey = refkey.ref_id.string()
if our_pubkey == pubkey, let pk = self.referenced_pubkeys.first {
pubkey = pk
}
// NDBTODO: pass data to pubkey
let dec = decrypt_dm(key, pubkey: pubkey, content: self.content, encoding: .base64)
let dec = decrypt_dm(privkey, pubkey: pubkey, content: self.content, encoding: .base64)
self.decrypted_content = dec
return dec
}
/*
var description: String {
return "NostrEvent { id: \(id) pubkey \(pubkey) kind \(kind) tags \(tags) content '\(content)' }"
}
// Not sure I should implement this
private func get_referenced_ids(key: String) -> [ReferencedId] {
return damus.get_referenced_ids(tags: self.tags, key: key)
}
*/
public func direct_replies(_ privkey: Privkey?) -> [ReferencedId] {
public func direct_replies(_ privkey: Privkey?) -> [NoteId] {
return event_refs(privkey).reduce(into: []) { acc, evref in
if let direct_reply = evref.is_direct_reply {
acc.append(direct_reply)
acc.append(direct_reply.note_id)
}
}
}
@@ -324,83 +371,62 @@ extension NdbNote {
public func thread_id(privkey: Privkey?) -> NoteId {
for ref in event_refs(privkey) {
if let thread_id = ref.is_thread_id {
return thread_id.ref_id
return thread_id.note_id
}
}
return self.id
}
public func last_refid() -> ReferencedId? {
return self.referenced_ids.last?.to_referenced_id()
public func last_refid() -> NoteId? {
return self.referenced_ids.last
}
// NDBTODO: id -> data
/*
public func references(id: String, key: AsciiCharacter) -> Bool {
var matcher: (Reference) -> Bool = { ref in ref.ref_id.matches_str(id) }
if id.count == 64, let decoded = hex_decode(id) {
matcher = { ref in ref.ref_id.matches_id(decoded) }
}
for ref in References(tags: self.tags) {
if ref.key == key && ref.id.string() == id {
if ref.key == key && matcher(ref) {
return true
}
}
return false
}
*/
func is_reply(_ privkey: Privkey?) -> Bool {
return event_is_reply(self.event_refs(privkey))
}
func note_language(_ privkey: Privkey?) async -> String? {
let t = Task.detached {
// Rely on Apple's NLLanguageRecognizer to tell us which language it thinks the note is in
// and filter on only the text portions of the content as URLs and hashtags confuse the language recognizer.
let originalBlocks = self.blocks(privkey).blocks
let originalOnlyText = originalBlocks.compactMap { $0.is_text }.joined(separator: " ")
func note_language(_ privkey: Privkey?) -> String? {
assert(!Thread.isMainThread, "This function must not be run on the main thread.")
// Only accept language recognition hypothesis if there's at least a 50% probability that it's accurate.
let languageRecognizer = NLLanguageRecognizer()
languageRecognizer.processString(originalOnlyText)
// Rely on Apple's NLLanguageRecognizer to tell us which language it thinks the note is in
// and filter on only the text portions of the content as URLs and hashtags confuse the language recognizer.
let originalBlocks = self.blocks(privkey).blocks
let originalOnlyText = originalBlocks.compactMap { $0.is_text }.joined(separator: " ")
guard let locale = languageRecognizer.languageHypotheses(withMaximum: 1).first(where: { $0.value >= 0.5 })?.key.rawValue else {
let nstr: String? = nil
return nstr
}
// Only accept language recognition hypothesis if there's at least a 50% probability that it's accurate.
let languageRecognizer = NLLanguageRecognizer()
languageRecognizer.processString(originalOnlyText)
// Remove the variant component and just take the language part as translation services typically only supports the variant-less language.
// Moreover, speakers of one variant can generally understand other variants.
return localeToLanguage(locale)
guard let locale = languageRecognizer.languageHypotheses(withMaximum: 1).first(where: { $0.value >= 0.5 })?.key.rawValue else {
let nstr: String? = nil
return nstr
}
return await t.value
}
/*
func calculate_id() {
self.id = calculate_event_id(ev: self)
}
func sign(privkey: String) {
self.sig = sign_event(privkey: privkey, ev: self)
// Remove the variant component and just take the language part as translation services typically only supports the variant-less language.
// Moreover, speakers of one variant can generally understand other variants.
return localeToLanguage(locale)
}
var age: TimeInterval {
let event_date = Date(timeIntervalSince1970: TimeInterval(created_at))
return Date.now.timeIntervalSince(event_date)
}
*/
}
extension LazyFilterSequence {
var first: Element? {
self.first(where: { _ in true })
}
var last: Element? {
var ev: Element? = nil
for e in self {
ev = e
}
return ev
}
}