stillbox/internal/trending/item.go

106 lines
2.7 KiB
Go
Raw Normal View History

2024-10-22 23:44:40 -04:00
package trending
import (
"math"
"time"
timeseries "dynatron.me/x/stillbox/internal/timeseries"
2024-10-22 23:44:40 -04:00
)
type item[K comparable] struct {
2024-10-22 23:44:40 -04:00
eventSeries TimeSeries
maxSeries SlidingWindow
max float64
maxTime time.Time
options *options[K]
2024-10-22 23:44:40 -04:00
// TODO: move outside of item because it's the same for all items
defaultExpectation float64
defaultHourlyCount float64
}
func newItem[K comparable](id K, options *options[K]) *item[K] {
2024-10-22 23:44:40 -04:00
defaultHourlyCount := float64(options.baseCount) * float64(options.storageDuration/time.Hour)
defaultExpectation := float64(options.baseCount) / float64(time.Hour/options.recentDuration)
return &item[K]{
2024-10-22 23:44:40 -04:00
eventSeries: options.creator(id),
maxSeries: options.slidingWindowCreator(id),
options: options,
defaultExpectation: defaultExpectation,
defaultHourlyCount: defaultHourlyCount,
}
}
func (i *item[K]) score() Score[K] {
2024-10-22 23:44:40 -04:00
recentCount, count := i.computeCounts()
if recentCount < i.options.countThreshold {
return Score[K]{}
2024-10-22 23:44:40 -04:00
}
if recentCount == count {
// we see this for the first time so there is no historical data
// use a sensible default like average/median over all items
count = recentCount + i.defaultHourlyCount
}
probability := recentCount / count
// order of those two lines is important.
// if we insert before reading we might just get the same value.
expectation := i.computeRecentMax()
i.maxSeries.Insert(probability)
if expectation == 0.0 {
expectation = i.defaultExpectation
}
klScore := computeKullbackLeibler(probability, expectation)
if klScore > i.max {
i.updateMax(klScore)
}
i.decayMax()
mixedScore := 5 * (klScore + i.max)
2024-10-22 23:44:40 -04:00
return Score[K]{
2024-10-22 23:44:40 -04:00
Score: mixedScore,
Probability: probability,
Expectation: expectation,
Maximum: i.max,
KLScore: klScore,
Count: count,
RecentCount: recentCount,
2024-10-22 23:44:40 -04:00
}
}
func (i *item[K]) computeCounts() (float64, float64) {
now := timeseries.DefaultClock.Now()
2024-10-22 23:44:40 -04:00
totalCount, _ := i.eventSeries.Range(now.Add(-i.options.storageDuration), now)
count, _ := i.eventSeries.Range(now.Add(-i.options.recentDuration), now)
return count, totalCount
}
func (i *item[K]) computeRecentMax() float64 {
2024-10-22 23:44:40 -04:00
return i.maxSeries.Max()
}
func (i *item[K]) decayMax() {
2024-10-22 23:44:40 -04:00
i.updateMax(i.max * i.computeExponentialDecayMultiplier())
}
func (i *item[K]) updateMax(score float64) {
2024-10-22 23:44:40 -04:00
i.max = score
i.maxTime = timeseries.DefaultClock.Now()
2024-10-22 23:44:40 -04:00
}
func (i *item[K]) computeExponentialDecayMultiplier() float64 {
return math.Pow(0.5, float64(timeseries.DefaultClock.Now().Unix()-i.maxTime.Unix())/i.options.halfLife.Seconds())
2024-10-22 23:44:40 -04:00
}
func computeKullbackLeibler(probability float64, expectation float64) float64 {
if probability == 0.0 {
return 0.0
}
return probability * math.Log(probability/expectation)
}