ndb: switch to nostrdb notes
This is a refactor of the codebase to use a more memory-efficient representation of notes. It should also be much faster at decoding since we're using a custom C json parser now. Changelog-Changed: Improved memory usage and performance when processing events
This commit is contained in:
@@ -34,7 +34,7 @@ enum NdbData {
|
||||
}
|
||||
}
|
||||
|
||||
class NdbNote: Equatable, Hashable {
|
||||
class NdbNote: Encodable, Equatable, Hashable {
|
||||
// we can have owned notes, but we can also have lmdb virtual-memory mapped notes so its optional
|
||||
private let owned: Bool
|
||||
let count: Int
|
||||
@@ -52,6 +52,14 @@ class NdbNote: Equatable, Hashable {
|
||||
self.note = note
|
||||
self.owned = owned_size != nil
|
||||
self.count = owned_size ?? 0
|
||||
|
||||
if let owned_size {
|
||||
NdbNote.total_ndb_size += Int(owned_size)
|
||||
NdbNote.notes_created += 1
|
||||
|
||||
print("\(NdbNote.notes_created) ndb_notes, \(NdbNote.total_ndb_size) bytes")
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
var content: String {
|
||||
@@ -67,13 +75,17 @@ class NdbNote: Equatable, Hashable {
|
||||
}
|
||||
|
||||
/// NDBTODO: make this into data
|
||||
var id: String {
|
||||
hex_encode(Data(buffer: UnsafeBufferPointer(start: ndb_note_id(note), count: 32)))
|
||||
var id: NoteId {
|
||||
.init(Data(bytes: ndb_note_id(note), count: 32))
|
||||
}
|
||||
|
||||
var sig: Signature {
|
||||
.init(Data(bytes: ndb_note_sig(note), count: 64))
|
||||
}
|
||||
|
||||
/// NDBTODO: make this into data
|
||||
var pubkey: String {
|
||||
hex_encode(Data(buffer: UnsafeBufferPointer(start: ndb_note_pubkey(note), count: 32)))
|
||||
var pubkey: Pubkey {
|
||||
.init(Data(bytes: ndb_note_pubkey(note), count: 32))
|
||||
}
|
||||
|
||||
var created_at: UInt32 {
|
||||
@@ -90,6 +102,10 @@ class NdbNote: Equatable, Hashable {
|
||||
|
||||
deinit {
|
||||
if self.owned {
|
||||
NdbNote.total_ndb_size -= Int(count)
|
||||
NdbNote.notes_created -= 1
|
||||
|
||||
print("\(NdbNote.notes_created) ndb_notes, \(NdbNote.total_ndb_size) bytes")
|
||||
free(note)
|
||||
}
|
||||
}
|
||||
@@ -102,58 +118,100 @@ class NdbNote: Equatable, Hashable {
|
||||
hasher.combine(id)
|
||||
}
|
||||
|
||||
static let max_note_size: Int = 2 << 18
|
||||
private enum CodingKeys: String, CodingKey {
|
||||
case id, sig, tags, pubkey, created_at, kind, content
|
||||
}
|
||||
|
||||
// Implement the `Encodable` protocol
|
||||
func encode(to encoder: Encoder) throws {
|
||||
var container = encoder.container(keyedBy: CodingKeys.self)
|
||||
|
||||
try container.encode(hex_encode(id.id), forKey: .id)
|
||||
try container.encode(hex_encode(sig.data), forKey: .sig)
|
||||
try container.encode(pubkey, forKey: .pubkey)
|
||||
try container.encode(created_at, forKey: .created_at)
|
||||
try container.encode(kind, forKey: .kind)
|
||||
try container.encode(content, forKey: .content)
|
||||
try container.encode(tags, forKey: .tags)
|
||||
}
|
||||
|
||||
static var total_ndb_size: Int = 0
|
||||
static var notes_created: Int = 0
|
||||
|
||||
init?(content: String, keypair: Keypair, kind: UInt32 = 1, tags: [[String]] = [], createdAt: UInt32 = UInt32(Date().timeIntervalSince1970)) {
|
||||
|
||||
var builder = ndb_builder()
|
||||
let buflen = MAX_NOTE_SIZE
|
||||
let buf = malloc(buflen)
|
||||
let idbuf = malloc(buflen)
|
||||
|
||||
ndb_builder_init(&builder, buf, Int32(buflen))
|
||||
|
||||
guard var pk_raw = hex_decode(keypair.pubkey) else { return nil }
|
||||
var pk_raw = keypair.pubkey.bytes
|
||||
|
||||
ndb_builder_set_pubkey(&builder, &pk_raw)
|
||||
ndb_builder_set_kind(&builder, UInt32(kind))
|
||||
ndb_builder_set_created_at(&builder, createdAt)
|
||||
|
||||
var ok = true
|
||||
for tag in tags {
|
||||
ndb_builder_new_tag(&builder);
|
||||
for elem in tag {
|
||||
_ = elem.withCString { eptr in
|
||||
ndb_builder_push_tag_str(&builder, eptr, Int32(elem.utf8.count))
|
||||
ok = elem.withCString({ eptr in
|
||||
return ndb_builder_push_tag_str(&builder, eptr, Int32(elem.utf8.count)) > 0
|
||||
})
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
_ = content.withCString { cptr in
|
||||
ndb_builder_set_content(&builder, content, Int32(content.utf8.count));
|
||||
ok = content.withCString { cptr in
|
||||
return ndb_builder_set_content(&builder, cptr, Int32(content.utf8.count)) > 0
|
||||
}
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
|
||||
var n = UnsafeMutablePointer<ndb_note>?(nil)
|
||||
|
||||
let keypair = keypair.privkey.map { sec in
|
||||
|
||||
var the_kp: ndb_keypair? = nil
|
||||
|
||||
if let sec = keypair.privkey {
|
||||
var kp = ndb_keypair()
|
||||
return sec.withCString { secptr in
|
||||
ndb_decode_key(secptr, &kp)
|
||||
return kp
|
||||
memcpy(&kp.secret.0, sec.id.bytes, 32);
|
||||
|
||||
if ndb_create_keypair(&kp) <= 0 {
|
||||
print("bad keypair")
|
||||
} else {
|
||||
the_kp = kp
|
||||
}
|
||||
}
|
||||
|
||||
var len: Int32 = 0
|
||||
if var keypair {
|
||||
len = ndb_builder_finalize(&builder, &n, &keypair)
|
||||
if var the_kp {
|
||||
len = ndb_builder_finalize(&builder, &n, &the_kp)
|
||||
} else {
|
||||
len = ndb_builder_finalize(&builder, &n, nil)
|
||||
}
|
||||
|
||||
free(idbuf)
|
||||
if len <= 0 {
|
||||
free(buf)
|
||||
return nil
|
||||
}
|
||||
|
||||
//guard let n else { return nil }
|
||||
|
||||
self.owned = true
|
||||
self.count = Int(len)
|
||||
self.note = realloc(n, Int(len)).assumingMemoryBound(to: ndb_note.self)
|
||||
//self.note = n
|
||||
let r = realloc(buf, Int(len))
|
||||
guard let r else {
|
||||
free(buf)
|
||||
return nil
|
||||
}
|
||||
|
||||
self.note = r.assumingMemoryBound(to: ndb_note.self)
|
||||
}
|
||||
|
||||
static func owned_from_json(json: String, bufsize: Int = 2 << 18) -> NdbNote? {
|
||||
@@ -204,13 +262,8 @@ extension NdbNote {
|
||||
return !too_big
|
||||
}
|
||||
|
||||
|
||||
//var is_valid_id: Bool {
|
||||
// return calculate_event_id(ev: self) == self.id
|
||||
//}
|
||||
|
||||
func get_blocks(content: String) -> Blocks {
|
||||
return parse_note_content_ndb(note: self)
|
||||
func get_blocks(privkey: Privkey?) -> Blocks {
|
||||
return parse_note_content(content: .init(note: self, privkey: privkey))
|
||||
}
|
||||
|
||||
func get_inner_event(cache: EventCache) -> NostrEvent? {
|
||||
@@ -218,28 +271,41 @@ extension NdbNote {
|
||||
return nil
|
||||
}
|
||||
|
||||
if self.content == "", let ref = self.referenced_ids.first {
|
||||
if self.content_len == 0, let id = self.referenced_ids.first {
|
||||
// TODO: raw id cache lookups
|
||||
let id = ref.id.string()
|
||||
return cache.lookup(id)
|
||||
}
|
||||
|
||||
// TODO: how to handle inner events?
|
||||
return nil
|
||||
//return self.inner_event
|
||||
return self.inner_event
|
||||
}
|
||||
|
||||
// TODO: References iterator
|
||||
public var referenced_ids: LazyFilterSequence<References> {
|
||||
References.ids(tags: self.tags)
|
||||
public var referenced_ids: References<NoteId> {
|
||||
References<NoteId>(tags: self.tags)
|
||||
}
|
||||
|
||||
public var referenced_pubkeys: LazyFilterSequence<References> {
|
||||
References.pubkeys(tags: self.tags)
|
||||
public var referenced_noterefs: References<NoteRef> {
|
||||
References<NoteRef>(tags: self.tags)
|
||||
}
|
||||
|
||||
public var referenced_hashtags: LazyFilterSequence<References> {
|
||||
References.hashtags(tags: self.tags)
|
||||
public var referenced_follows: References<FollowRef> {
|
||||
References<FollowRef>(tags: self.tags)
|
||||
}
|
||||
|
||||
public var referenced_pubkeys: References<Pubkey> {
|
||||
References<Pubkey>(tags: self.tags)
|
||||
}
|
||||
|
||||
public var referenced_hashtags: References<Hashtag> {
|
||||
References<Hashtag>(tags: self.tags)
|
||||
}
|
||||
|
||||
public var referenced_params: References<ReplaceableParam> {
|
||||
References<ReplaceableParam>(tags: self.tags)
|
||||
}
|
||||
|
||||
public var references: References<RefId> {
|
||||
References<RefId>(tags: self.tags)
|
||||
}
|
||||
|
||||
func event_refs(_ privkey: Privkey?) -> [EventRef] {
|
||||
@@ -262,7 +328,7 @@ extension NdbNote {
|
||||
func blocks(_ privkey: Privkey?) -> Blocks {
|
||||
if let bs = _blocks { return bs }
|
||||
|
||||
let blocks = get_blocks(content: self.get_content(privkey))
|
||||
let blocks = get_blocks(privkey: privkey)
|
||||
self._blocks = blocks
|
||||
return blocks
|
||||
}
|
||||
@@ -273,11 +339,8 @@ extension NdbNote {
|
||||
return decrypted_content
|
||||
}
|
||||
|
||||
guard let key = privkey else {
|
||||
return nil
|
||||
}
|
||||
|
||||
guard let our_pubkey = privkey_to_pubkey(privkey: key) else {
|
||||
guard let privkey,
|
||||
let our_pubkey = privkey_to_pubkey(privkey: privkey) else {
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -285,37 +348,21 @@ extension NdbNote {
|
||||
var pubkey = self.pubkey
|
||||
// This is our DM, we need to use the pubkey of the person we're talking to instead
|
||||
|
||||
if our_pubkey == pubkey {
|
||||
guard let refkey = self.referenced_pubkeys.first else {
|
||||
return nil
|
||||
}
|
||||
|
||||
pubkey = refkey.ref_id.string()
|
||||
if our_pubkey == pubkey, let pk = self.referenced_pubkeys.first {
|
||||
pubkey = pk
|
||||
}
|
||||
|
||||
// NDBTODO: pass data to pubkey
|
||||
let dec = decrypt_dm(key, pubkey: pubkey, content: self.content, encoding: .base64)
|
||||
let dec = decrypt_dm(privkey, pubkey: pubkey, content: self.content, encoding: .base64)
|
||||
self.decrypted_content = dec
|
||||
|
||||
return dec
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
var description: String {
|
||||
return "NostrEvent { id: \(id) pubkey \(pubkey) kind \(kind) tags \(tags) content '\(content)' }"
|
||||
}
|
||||
|
||||
// Not sure I should implement this
|
||||
private func get_referenced_ids(key: String) -> [ReferencedId] {
|
||||
return damus.get_referenced_ids(tags: self.tags, key: key)
|
||||
}
|
||||
*/
|
||||
|
||||
public func direct_replies(_ privkey: Privkey?) -> [ReferencedId] {
|
||||
public func direct_replies(_ privkey: Privkey?) -> [NoteId] {
|
||||
return event_refs(privkey).reduce(into: []) { acc, evref in
|
||||
if let direct_reply = evref.is_direct_reply {
|
||||
acc.append(direct_reply)
|
||||
acc.append(direct_reply.note_id)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -324,83 +371,62 @@ extension NdbNote {
|
||||
public func thread_id(privkey: Privkey?) -> NoteId {
|
||||
for ref in event_refs(privkey) {
|
||||
if let thread_id = ref.is_thread_id {
|
||||
return thread_id.ref_id
|
||||
return thread_id.note_id
|
||||
}
|
||||
}
|
||||
|
||||
return self.id
|
||||
}
|
||||
|
||||
public func last_refid() -> ReferencedId? {
|
||||
return self.referenced_ids.last?.to_referenced_id()
|
||||
public func last_refid() -> NoteId? {
|
||||
return self.referenced_ids.last
|
||||
}
|
||||
|
||||
// NDBTODO: id -> data
|
||||
/*
|
||||
public func references(id: String, key: AsciiCharacter) -> Bool {
|
||||
var matcher: (Reference) -> Bool = { ref in ref.ref_id.matches_str(id) }
|
||||
if id.count == 64, let decoded = hex_decode(id) {
|
||||
matcher = { ref in ref.ref_id.matches_id(decoded) }
|
||||
}
|
||||
for ref in References(tags: self.tags) {
|
||||
if ref.key == key && ref.id.string() == id {
|
||||
if ref.key == key && matcher(ref) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
*/
|
||||
|
||||
func is_reply(_ privkey: Privkey?) -> Bool {
|
||||
return event_is_reply(self.event_refs(privkey))
|
||||
}
|
||||
|
||||
func note_language(_ privkey: Privkey?) async -> String? {
|
||||
let t = Task.detached {
|
||||
// Rely on Apple's NLLanguageRecognizer to tell us which language it thinks the note is in
|
||||
// and filter on only the text portions of the content as URLs and hashtags confuse the language recognizer.
|
||||
let originalBlocks = self.blocks(privkey).blocks
|
||||
let originalOnlyText = originalBlocks.compactMap { $0.is_text }.joined(separator: " ")
|
||||
func note_language(_ privkey: Privkey?) -> String? {
|
||||
assert(!Thread.isMainThread, "This function must not be run on the main thread.")
|
||||
|
||||
// Only accept language recognition hypothesis if there's at least a 50% probability that it's accurate.
|
||||
let languageRecognizer = NLLanguageRecognizer()
|
||||
languageRecognizer.processString(originalOnlyText)
|
||||
// Rely on Apple's NLLanguageRecognizer to tell us which language it thinks the note is in
|
||||
// and filter on only the text portions of the content as URLs and hashtags confuse the language recognizer.
|
||||
let originalBlocks = self.blocks(privkey).blocks
|
||||
let originalOnlyText = originalBlocks.compactMap { $0.is_text }.joined(separator: " ")
|
||||
|
||||
guard let locale = languageRecognizer.languageHypotheses(withMaximum: 1).first(where: { $0.value >= 0.5 })?.key.rawValue else {
|
||||
let nstr: String? = nil
|
||||
return nstr
|
||||
}
|
||||
// Only accept language recognition hypothesis if there's at least a 50% probability that it's accurate.
|
||||
let languageRecognizer = NLLanguageRecognizer()
|
||||
languageRecognizer.processString(originalOnlyText)
|
||||
|
||||
// Remove the variant component and just take the language part as translation services typically only supports the variant-less language.
|
||||
// Moreover, speakers of one variant can generally understand other variants.
|
||||
return localeToLanguage(locale)
|
||||
guard let locale = languageRecognizer.languageHypotheses(withMaximum: 1).first(where: { $0.value >= 0.5 })?.key.rawValue else {
|
||||
let nstr: String? = nil
|
||||
return nstr
|
||||
}
|
||||
|
||||
return await t.value
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
func calculate_id() {
|
||||
self.id = calculate_event_id(ev: self)
|
||||
}
|
||||
|
||||
func sign(privkey: String) {
|
||||
self.sig = sign_event(privkey: privkey, ev: self)
|
||||
// Remove the variant component and just take the language part as translation services typically only supports the variant-less language.
|
||||
// Moreover, speakers of one variant can generally understand other variants.
|
||||
return localeToLanguage(locale)
|
||||
}
|
||||
|
||||
var age: TimeInterval {
|
||||
let event_date = Date(timeIntervalSince1970: TimeInterval(created_at))
|
||||
return Date.now.timeIntervalSince(event_date)
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
extension LazyFilterSequence {
|
||||
var first: Element? {
|
||||
self.first(where: { _ in true })
|
||||
}
|
||||
|
||||
var last: Element? {
|
||||
var ev: Element? = nil
|
||||
for e in self {
|
||||
ev = e
|
||||
}
|
||||
return ev
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user