2024-10-22 23:44:40 -04:00
|
|
|
package trending
|
|
|
|
|
|
|
|
import (
|
|
|
|
"math"
|
|
|
|
"time"
|
2024-10-23 08:55:19 -04:00
|
|
|
|
|
|
|
timeseries "dynatron.me/x/stillbox/internal/timeseries"
|
2024-10-22 23:44:40 -04:00
|
|
|
)
|
|
|
|
|
2024-10-23 08:55:19 -04:00
|
|
|
type item[K comparable] struct {
|
2024-10-22 23:44:40 -04:00
|
|
|
eventSeries TimeSeries
|
|
|
|
maxSeries SlidingWindow
|
|
|
|
|
|
|
|
max float64
|
|
|
|
maxTime time.Time
|
2024-10-23 08:55:19 -04:00
|
|
|
options *options[K]
|
2024-10-22 23:44:40 -04:00
|
|
|
|
|
|
|
// TODO: move outside of item because it's the same for all items
|
|
|
|
defaultExpectation float64
|
|
|
|
defaultHourlyCount float64
|
|
|
|
}
|
|
|
|
|
2024-10-23 08:55:19 -04:00
|
|
|
func newItem[K comparable](id K, options *options[K]) *item[K] {
|
2024-10-22 23:44:40 -04:00
|
|
|
defaultHourlyCount := float64(options.baseCount) * float64(options.storageDuration/time.Hour)
|
|
|
|
defaultExpectation := float64(options.baseCount) / float64(time.Hour/options.recentDuration)
|
2024-10-23 08:55:19 -04:00
|
|
|
return &item[K]{
|
2024-10-22 23:44:40 -04:00
|
|
|
eventSeries: options.creator(id),
|
|
|
|
maxSeries: options.slidingWindowCreator(id),
|
|
|
|
options: options,
|
|
|
|
|
|
|
|
defaultExpectation: defaultExpectation,
|
|
|
|
defaultHourlyCount: defaultHourlyCount,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-10-23 08:55:19 -04:00
|
|
|
func (i *item[K]) score() Score[K] {
|
2024-10-22 23:44:40 -04:00
|
|
|
recentCount, count := i.computeCounts()
|
|
|
|
if recentCount < i.options.countThreshold {
|
2024-10-23 08:55:19 -04:00
|
|
|
return Score[K]{}
|
2024-10-22 23:44:40 -04:00
|
|
|
}
|
|
|
|
if recentCount == count {
|
|
|
|
// we see this for the first time so there is no historical data
|
|
|
|
// use a sensible default like average/median over all items
|
|
|
|
count = recentCount + i.defaultHourlyCount
|
|
|
|
}
|
|
|
|
probability := recentCount / count
|
|
|
|
|
|
|
|
// order of those two lines is important.
|
|
|
|
// if we insert before reading we might just get the same value.
|
|
|
|
expectation := i.computeRecentMax()
|
|
|
|
i.maxSeries.Insert(probability)
|
|
|
|
|
|
|
|
if expectation == 0.0 {
|
|
|
|
expectation = i.defaultExpectation
|
|
|
|
}
|
|
|
|
|
|
|
|
klScore := computeKullbackLeibler(probability, expectation)
|
|
|
|
if klScore > i.max {
|
|
|
|
i.updateMax(klScore)
|
|
|
|
}
|
|
|
|
i.decayMax()
|
|
|
|
|
2024-10-23 08:55:19 -04:00
|
|
|
mixedScore := 5 * (klScore + i.max)
|
2024-10-22 23:44:40 -04:00
|
|
|
|
2024-10-23 08:55:19 -04:00
|
|
|
return Score[K]{
|
2024-10-22 23:44:40 -04:00
|
|
|
Score: mixedScore,
|
|
|
|
Probability: probability,
|
|
|
|
Expectation: expectation,
|
|
|
|
Maximum: i.max,
|
|
|
|
KLScore: klScore,
|
2024-10-23 08:55:19 -04:00
|
|
|
Count: count,
|
|
|
|
RecentCount: recentCount,
|
2024-10-22 23:44:40 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-10-23 08:55:19 -04:00
|
|
|
func (i *item[K]) computeCounts() (float64, float64) {
|
|
|
|
now := timeseries.DefaultClock.Now()
|
2024-10-22 23:44:40 -04:00
|
|
|
totalCount, _ := i.eventSeries.Range(now.Add(-i.options.storageDuration), now)
|
|
|
|
count, _ := i.eventSeries.Range(now.Add(-i.options.recentDuration), now)
|
|
|
|
return count, totalCount
|
|
|
|
}
|
|
|
|
|
2024-10-23 08:55:19 -04:00
|
|
|
func (i *item[K]) computeRecentMax() float64 {
|
2024-10-22 23:44:40 -04:00
|
|
|
return i.maxSeries.Max()
|
|
|
|
}
|
|
|
|
|
2024-10-23 08:55:19 -04:00
|
|
|
func (i *item[K]) decayMax() {
|
2024-10-22 23:44:40 -04:00
|
|
|
i.updateMax(i.max * i.computeExponentialDecayMultiplier())
|
|
|
|
}
|
|
|
|
|
2024-10-23 08:55:19 -04:00
|
|
|
func (i *item[K]) updateMax(score float64) {
|
2024-10-22 23:44:40 -04:00
|
|
|
i.max = score
|
2024-10-23 08:55:19 -04:00
|
|
|
i.maxTime = timeseries.DefaultClock.Now()
|
2024-10-22 23:44:40 -04:00
|
|
|
}
|
|
|
|
|
2024-10-23 08:55:19 -04:00
|
|
|
func (i *item[K]) computeExponentialDecayMultiplier() float64 {
|
|
|
|
return math.Pow(0.5, float64(timeseries.DefaultClock.Now().Unix()-i.maxTime.Unix())/i.options.halfLife.Seconds())
|
2024-10-22 23:44:40 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
func computeKullbackLeibler(probability float64, expectation float64) float64 {
|
|
|
|
if probability == 0.0 {
|
|
|
|
return 0.0
|
|
|
|
}
|
|
|
|
return probability * math.Log(probability/expectation)
|
|
|
|
}
|