pzp-conductor/lib/index.js

291 lines
8.7 KiB
JavaScript

const makeDebug = require('debug')
const MsgV4 = require('pzp-db/msg-v4')
/**
* @typedef {ReturnType<import('pzp-db').init>} PZPDB
* @typedef {ReturnType<import('pzp-goals').init>} PZPGoal
* @typedef {import('pzp-goals').GoalDSL} GoalDSL
* @typedef {ReturnType<import('pzp-set').init>} PZPSet
* @typedef {ReturnType<import('pzp-dict').init>} PZPDict
* @typedef {ReturnType<import('pzp-sync').init>} PZPSync
* @typedef {ReturnType<import('pzp-gc').init>} PZPGC
* @typedef {`${string}@${GoalDSL}`} Rule
* @typedef {[Array<Rule>, Array<Rule>]} Rules
* @typedef {{
* db: PZPDB,
* goals: PZPGoal,
* set: PZPSet,
* sync: PZPSync,
* gc: PZPGC,
* dict: PZPDict | null,
* }} Peer
*/
/**
* @template T
* @typedef {T extends void ?
* (...args: [Error] | []) => void :
* (...args: [Error] | [null, T]) => void
* } CB
*/
/**
* @param {any} rule
* @returns {[string, GoalDSL]}
*/
function parseRule(rule) {
if (typeof rule !== 'string') throw new Error('rule must be a string')
if (!rule) throw new Error('rule must not be empty')
if (!rule.includes('@')) throw new Error('rule fit the format "domain@goal"')
const splitted = /**@type {[string, GoalDSL]}*/ (rule.split('@'))
if (!splitted[0]) throw new Error('rule must fit the format "domain@goal"')
if (!splitted[1]) throw new Error('rule must fit the format "domain@goal"')
return splitted
}
/**
* @param {Peer} peer
* @param {unknown} config
*/
function initConductor(peer, config) {
/**
* How many bytes does a single msg ID take up
*/
const MSG_ID_BYTES = MsgV4.getMootID('dummy', 'dummy').length
/**
* How many bytes does an average msg take up
*/
const ESTIMATE_MSG_SIZE = 600 // 600 bytes
/**
* How many bytes should we budget for ghost msg IDs in total in the database
*/
const ESTIMATE_TOTAL_GHOST_BYTES = 1024 * 1024 // 1 MB
/**
* How many msgs does the average 'follows' Set feed contain
*/
const ESTIMATE_FOLLOWS_FEED_SIZE = 300
/**
* How many msgs does the average 'block' Set feed contain
*/
const ESTIMATE_BLOCK_FEED_SIZE = 30
/**
* How many msgs does the average unknown feed contain
*/
const ESTIMATE_FEED_SIZE = 100
/**
* Absolute minimum acceptable maxBytes parameter
*/
const MIN_MAXBYTES = 1024 // 1 kB
/**
* Lower bound for somewhat acceptable maxBytes parameter
*/
const MIN_DECENT_MAXBYTES = 32 * 1024 * 1024 // 32 MB
/**
* Recommended maxBytes parameter
*/
const RECOMMENDED_MAXBYTES = 64 * 1024 * 1024 // 64 MB
/**
* Upper bound for somewhat acceptable maxBytes parameter
*/
const MAX_DECENT_MAXBYTES = 100 * 1024 * 1024 // 100 MB
const debug = makeDebug('pzp:conductor')
/**
* @param {Array<Rule>} rules
*/
function countGhostableFeeds(rules) {
let count = 2 // 'follows' and 'blocks' Sets
for (const rule of rules) {
const [, goalDSL] = parseRule(rule)
if (goalDSL === 'dict') count++
else if (goalDSL === 'set') count++
}
return count
}
/**
* @param {Rule} rule
*/
function getRealisticCount(rule) {
const [, goalDSL] = parseRule(rule)
const { count } = peer.goals.parse(goalDSL)
const realisticCount = isFinite(count)
? count
: Math.min(count, ESTIMATE_FEED_SIZE)
return realisticCount
}
/**
* Parses input rules. If goals are too big for maxBytes budget, scale down
* goals.
*
* @param {[Array<Rule>, Array<Rule>]} rules
* @param {number} numFollowed
* @param {number} maxBytes
* @returns {[Array<Rule>, Array<Rule>]}
*/
function validateRules(rules, numFollowed, maxBytes) {
// prettier-ignore
if (!Array.isArray(rules)) throw new Error('conductor.start expects array of rules')
const myRules = rules[0] ?? []
const theirRules = rules[1] ?? []
let estimateMsgCount =
(1 + numFollowed) * ESTIMATE_FOLLOWS_FEED_SIZE + ESTIMATE_BLOCK_FEED_SIZE
for (const rule of myRules) {
estimateMsgCount += getRealisticCount(rule)
}
for (const rule of theirRules) {
estimateMsgCount += numFollowed * getRealisticCount(rule)
}
const estimateBytesUsed = estimateMsgCount * ESTIMATE_MSG_SIZE
const factor = maxBytes / estimateBytesUsed
if (estimateBytesUsed > maxBytes) {
if (maxBytes < MIN_DECENT_MAXBYTES) {
// prettier-ignore
debug('WARNING. maxBytes is in practice too small, we recommend at least %s bytes, ideally %s bytes, and at most %s bytes', MIN_DECENT_MAXBYTES, RECOMMENDED_MAXBYTES, MAX_DECENT_MAXBYTES)
} else {
// prettier-ignore
debug('WARNING. maxBytes might be easily surpassed, you should downscale rules to %s%', (factor*100).toFixed(0))
}
}
return [myRules, theirRules]
}
/**
* @param {string} accountID
* @param {Rule} rule
* @return {[string, GoalDSL]}
*/
function materializeRule(accountID, rule) {
const [subdomain, goalDSL] = parseRule(rule)
// prettier-ignore
if (goalDSL === 'dict' && !peer.dict) throw new Error('Cannot parse dict rule because dict plugin is not installed')
const domain =
goalDSL === 'set'
? peer.set.getDomain(subdomain)
: goalDSL === 'dict' && peer.dict
? peer.dict.getDomain(subdomain)
: subdomain
const feedID = peer.db.feed.getID(accountID, domain)
return [feedID, goalDSL]
}
/**
* Set replication goals for various tangles of an account:
* - Account tangle
* - Follows tangle (a Set)
* - Blocks tangle (a Set)
* - Each tangle in the rule
*
* The "rule" is just a list of domains of feeds.
* @param {string} accountID ID of the account to set goals for
* @param {Array<Rule>} rules list of feed domains of interest
*/
function setupAccountGoals(accountID, rules) {
peer.goals.set(accountID, 'all')
/** @type {Array<Rule>} */
const allRules = ['follows@set', 'blocks@set', ...rules]
for (const rule of allRules) {
const [feedID, goalDSL] = materializeRule(accountID, rule)
peer.goals.set(feedID, goalDSL)
}
// prettier-ignore
debug('Setup goals for %s@all, %s@set, %s@set, %s', accountID, allRules.join(', '))
}
/**
* @param {string} accountID
* @param {Array<Rule>} rules
*/
function teardownAccountGoals(accountID, rules) {
peer.goals.set(accountID, 'none')
/** @type {Array<Rule>} */
const allRules = ['follows@set', 'blocks@set', ...rules]
for (const rule of allRules) {
const [feedID] = materializeRule(accountID, rule)
peer.goals.set(feedID, 'none')
}
// prettier-ignore
debug('Teardown goals for %s@all, %s@set, %s@set, %s', accountID, allRules.join(', '))
}
/**
* Starts automatic sync and garbage collection.
* Assumes that PZP Set has been loaded with the same accountID.
*
* @param {string} myID
* @param {[Array<Rule>, Array<Rule>]} rules
* @param {number} maxBytes
* @param {CB<void>} cb
*/
function start(myID, rules, maxBytes, cb) {
if (maxBytes < MIN_MAXBYTES) {
// prettier-ignore
return cb(Error(`pzp-conductor maxBytes must be at least ${MIN_MAXBYTES} bytes, got ${maxBytes}`))
}
if (maxBytes > MAX_DECENT_MAXBYTES) {
// prettier-ignore
debug('WARNING. maxBytes is too big, we recommend at most %s bytes', MAX_DECENT_MAXBYTES)
}
peer.set.values('follows', null, (err, follows) => {
if (err) return cb(err)
const numFollows = follows.length
const [myRules, theirRules] = validateRules(rules, numFollows, maxBytes)
// Set up goals for my account and each account I follow
setupAccountGoals(myID, myRules)
for (const theirID of follows) {
setupAccountGoals(theirID, theirRules)
}
// @ts-ignore
peer.set.watch(({ event, subdomain, value }) => {
const theirID = value
if (subdomain === 'follows' && event === 'add') {
setupAccountGoals(theirID, theirRules)
}
if (subdomain === 'follows' && event === 'del') {
teardownAccountGoals(theirID, theirRules)
}
if (subdomain === 'blocks' && event === 'add') {
teardownAccountGoals(theirID, theirRules)
}
})
// Figure out ghost span for each account
const totalGhostableFeeds =
countGhostableFeeds(myRules) +
numFollows * countGhostableFeeds(theirRules)
const TOTAL_GHOSTS = ESTIMATE_TOTAL_GHOST_BYTES / MSG_ID_BYTES
const ghostSpan = Math.round(TOTAL_GHOSTS / totalGhostableFeeds)
peer.set.setGhostSpan(ghostSpan)
peer.dict?.setGhostSpan(ghostSpan)
// Kick off garbage collection and synchronization
peer.gc.start(maxBytes)
peer.sync.start()
cb()
})
}
return {
start,
}
}
exports.name = 'conductor'
exports.needs = ['db', 'goals', 'set', 'gc', 'sync']
exports.init = initConductor