feat: initial Beta 1 release

- soju raw connector with event playback and CHATHISTORY fallback
- SQLite store with msgid de-dup and retention job
- Mentions + Pushover + tuning; structured JSON logs
- Summaries: concise, link-following, multi-line grouping
- HTTP: /healthz, /ready, /tail, /trigger, /metrics
- Docker: distroless, healthcheck, version metadata
- Docs: README, CHANGELOG, compose
This commit is contained in:
Thomas Cravey 2025-08-15 18:06:28 -05:00
commit 2954e85e7a
19 changed files with 1983 additions and 0 deletions

297
internal/config/config.go Normal file
View file

@ -0,0 +1,297 @@
package config
import (
"os"
"strconv"
"strings"
"time"
"unicode"
)
type Config struct {
// IRC/soju
IRCServer string
IRCPort int
IRCTLS bool
Nick string
Username string
Realname string
Password string
Network string
Channels []string
Keywords []string
AuthMethod string
// Notifier
Notifier string
PushoverUserKey string
PushoverAPIToken string
// Summarizer / LLM
LLMProvider string
OpenAIAPIKey string
OpenAIBaseURL string
OpenAIModel string
OpenAIMaxTokens int
SummFollowLinks bool
SummLinkTimeout time.Duration
SummLinkMaxBytes int
SummGroupWindow time.Duration
SummMaxLinks int
// Digests
DigestCron string
DigestWindow time.Duration
QuietHours string
NotifyBackfill bool
MentionMinInterval time.Duration
MentionsOnlyChannels []string
MentionsDenyChannels []string
UrgentKeywords []string
// HTTP API
HTTPListen string
HTTPToken string
// Storage
StorePath string
RetentionDays int
// Logging
LogLevel string
}
func FromEnv() Config {
cfg := Config{}
cfg.IRCServer = getEnv("SOJU_HOST", "127.0.0.1")
cfg.IRCPort = getEnvInt("SOJU_PORT", 6697)
cfg.IRCTLS = getEnvBool("SOJU_TLS", true)
cfg.Nick = getEnv("IRC_NICK", "sojuboy")
cfg.Username = getEnv("IRC_USERNAME", cfg.Nick)
cfg.Realname = getEnv("IRC_REALNAME", "sojuboy")
cfg.Password = getEnv("IRC_PASSWORD", "")
cfg.Network = getEnv("SOJU_NETWORK", "")
cfg.Channels = splitCSV(getEnv("CHANNELS", ""))
cfg.Keywords = splitCSV(getEnv("KEYWORDS", cfg.Nick))
cfg.AuthMethod = strings.ToLower(getEnv("SOJU_AUTH", "sasl"))
cfg.Notifier = getEnv("NOTIFIER", "pushover")
cfg.PushoverUserKey = getEnv("PUSHOVER_USER_KEY", "")
cfg.PushoverAPIToken = getEnv("PUSHOVER_API_TOKEN", "")
cfg.LLMProvider = getEnv("LLM_PROVIDER", "openai")
cfg.OpenAIAPIKey = getEnv("OPENAI_API_KEY", "")
cfg.OpenAIBaseURL = getEnv("OPENAI_BASE_URL", "")
cfg.OpenAIModel = getEnv("OPENAI_MODEL", "gpt-5")
cfg.OpenAIMaxTokens = getEnvInt("OPENAI_MAX_TOKENS", 700)
cfg.SummFollowLinks = getEnvBool("SUMM_FOLLOW_LINKS", true)
cfg.SummLinkTimeout = getEnvDuration("SUMM_LINK_TIMEOUT", 6*time.Second)
cfg.SummLinkMaxBytes = getEnvInt("SUMM_LINK_MAX_BYTES", 262144)
cfg.SummGroupWindow = getEnvDuration("SUMM_GROUP_WINDOW", 90*time.Second)
cfg.SummMaxLinks = getEnvInt("SUMM_MAX_LINKS", 5)
cfg.DigestCron = getEnv("DIGEST_CRON", "0 */6 * * *")
cfg.DigestWindow = getEnvDuration("DIGEST_WINDOW", 6*time.Hour)
cfg.QuietHours = getEnv("QUIET_HOURS", "")
cfg.NotifyBackfill = getEnvBool("NOTIFY_BACKFILL", false)
cfg.MentionMinInterval = getEnvDuration("MENTION_MIN_INTERVAL", 30*time.Second)
cfg.MentionsOnlyChannels = splitCSV(getEnv("MENTIONS_ONLY_CHANNELS", ""))
cfg.MentionsDenyChannels = splitCSV(getEnv("MENTIONS_DENY_CHANNELS", ""))
cfg.UrgentKeywords = splitCSV(getEnv("URGENT_KEYWORDS", ""))
cfg.HTTPListen = getEnv("HTTP_LISTEN", ":8080")
cfg.HTTPToken = getEnv("HTTP_TOKEN", "")
cfg.StorePath = getEnv("STORE_PATH", "/data/app.db")
cfg.RetentionDays = getEnvInt("STORE_RETENTION_DAYS", 7)
cfg.LogLevel = getEnv("LOG_LEVEL", "info")
return cfg
}
func (c Config) Redact() Config {
r := c
if r.Password != "" {
r.Password = "***"
}
if r.PushoverUserKey != "" {
r.PushoverUserKey = "***"
}
if r.PushoverAPIToken != "" {
r.PushoverAPIToken = "***"
}
if r.OpenAIAPIKey != "" {
r.OpenAIAPIKey = "***"
}
if r.HTTPToken != "" {
r.HTTPToken = "***"
}
return r
}
func getEnv(key, def string) string {
if v := os.Getenv(key); v != "" {
return v
}
return def
}
func getEnvInt(key string, def int) int {
if v := os.Getenv(key); v != "" {
if n, err := strconv.Atoi(v); err == nil {
return n
}
}
return def
}
func getEnvBool(key string, def bool) bool {
if v := os.Getenv(key); v != "" {
v = strings.ToLower(strings.TrimSpace(v))
return v == "1" || v == "true" || v == "yes" || v == "y"
}
return def
}
func getEnvDuration(key string, def time.Duration) time.Duration {
if v := os.Getenv(key); v != "" {
d, err := time.ParseDuration(v)
if err == nil {
return d
}
}
return def
}
func splitCSV(s string) []string {
if strings.TrimSpace(s) == "" {
return nil
}
parts := strings.Split(s, ",")
out := make([]string, 0, len(parts))
for _, p := range parts {
p = strings.TrimSpace(p)
if p != "" {
out = append(out, p)
}
}
return out
}
// ContainsMention checks if text contains the nick or any keyword as a word, case-insensitive.
func ContainsMention(text, nick string, keywords []string) bool {
if nick != "" {
if containsWordFold(text, nick) {
return true
}
}
return ContainsAnyWordFold(text, keywords)
}
// ContainsAnyWordFold returns true if any word in keywords appears as a word in text (case-insensitive).
func ContainsAnyWordFold(text string, keywords []string) bool {
for _, k := range keywords {
if k == "" {
continue
}
if containsWordFold(text, k) {
return true
}
}
return false
}
func containsWordFold(text, word string) bool {
if word == "" {
return false
}
w := strings.ToLower(word)
// Iterate tokens separated by non-nick characters.
var buf []rune
flush := func() bool {
if len(buf) == 0 {
return false
}
t := strings.ToLower(string(buf))
buf = buf[:0]
return t == w
}
isNickChar := func(r rune) bool { return unicode.IsLetter(r) || unicode.IsDigit(r) || r == '_' || r == '-' }
for _, r := range text {
if isNickChar(r) {
buf = append(buf, r)
continue
}
if flush() {
return true
}
}
return flush()
}
// WithinQuietHours returns true if now is inside a quiet-hours window like "22:00-07:00" (24h local time).
func WithinQuietHours(now time.Time, window string) bool {
w := strings.TrimSpace(window)
if w == "" {
return false
}
parts := strings.Split(w, "-")
if len(parts) != 2 {
return false
}
parse := func(s string) (int, int, bool) {
p := strings.Split(strings.TrimSpace(s), ":")
if len(p) != 2 {
return 0, 0, false
}
h, err1 := strconv.Atoi(p[0])
m, err2 := strconv.Atoi(p[1])
if err1 != nil || err2 != nil {
return 0, 0, false
}
return h, m, true
}
sh, sm, ok1 := parse(parts[0])
eh, em, ok2 := parse(parts[1])
if !ok1 || !ok2 {
return false
}
start := time.Date(now.Year(), now.Month(), now.Day(), sh, sm, 0, 0, now.Location())
end := time.Date(now.Year(), now.Month(), now.Day(), eh, em, 0, 0, now.Location())
if !end.After(start) {
// window wraps past midnight
return now.After(start) || now.Before(end)
}
return now.After(start) && now.Before(end)
}
// GetEnvInt exports integer env getter for external use
func GetEnvInt(key string, def int) int { return getEnvInt(key, def) }
// IsChannelAllowed checks channel against allow/deny lists. If allow-list is non-empty, only those pass.
func (c Config) IsChannelAllowed(channel string) bool {
ch := strings.ToLower(strings.TrimSpace(channel))
if ch == "" {
return false
}
if len(c.MentionsOnlyChannels) > 0 {
allowed := false
for _, a := range c.MentionsOnlyChannels {
if strings.EqualFold(a, channel) {
allowed = true
break
}
}
if !allowed {
return false
}
}
for _, d := range c.MentionsDenyChannels {
if strings.EqualFold(d, channel) {
return false
}
}
return true
}

208
internal/httpapi/server.go Normal file
View file

@ -0,0 +1,208 @@
package httpapi
import (
"context"
"fmt"
"log/slog"
"net/http"
"strconv"
"strings"
"sync/atomic"
"time"
"sojuboy/internal/store"
"sojuboy/internal/summarizer"
)
type Metrics struct {
MessagesIngested int64 // counter
NotificationsSent int64 // counter
MessagesPruned int64 // counter
ConnectedGauge int64 // 0/1
}
type Server struct {
ListenAddr string
AuthToken string
Store *store.Store
Summarizer summarizer.Summarizer
Notifier interface {
Notify(context.Context, string, string) error
}
Logger *slog.Logger
Metrics *Metrics
Ready func() bool
}
func (s *Server) Start(ctx context.Context) error {
mux := http.NewServeMux()
mux.HandleFunc("/healthz", func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
_, _ = w.Write([]byte("ok"))
})
mux.HandleFunc("/ready", func(w http.ResponseWriter, r *http.Request) {
if s.Ready != nil && !s.Ready() {
w.WriteHeader(http.StatusServiceUnavailable)
_, _ = w.Write([]byte("not ready"))
return
}
w.WriteHeader(http.StatusOK)
_, _ = w.Write([]byte("ready"))
})
mux.HandleFunc("/trigger", s.handleTrigger)
mux.HandleFunc("/tail", s.handleTail)
mux.HandleFunc("/metrics", s.handleMetrics)
srv := &http.Server{
Addr: s.ListenAddr,
Handler: mux,
}
go func() {
<-ctx.Done()
_ = srv.Shutdown(context.Background())
}()
if s.Logger != nil {
s.Logger.Info("http listening", "addr", s.ListenAddr)
}
return srv.ListenAndServe()
}
func (s *Server) handleTrigger(w http.ResponseWriter, r *http.Request) {
if s.AuthToken != "" {
if !checkAuth(r, s.AuthToken) {
w.WriteHeader(http.StatusUnauthorized)
_, _ = w.Write([]byte("unauthorized"))
return
}
}
channel := r.URL.Query().Get("channel")
if channel == "" {
w.WriteHeader(http.StatusBadRequest)
_, _ = w.Write([]byte("missing channel"))
return
}
windowStr := r.URL.Query().Get("window")
if windowStr == "" {
windowStr = "6h"
}
window, err := time.ParseDuration(windowStr)
if err != nil {
w.WriteHeader(http.StatusBadRequest)
_, _ = w.Write([]byte("bad window"))
return
}
ctx := r.Context()
msgs, err := s.Store.ListMessagesSince(ctx, channel, time.Now().Add(-window))
if err != nil {
if s.Logger != nil {
s.Logger.Error("http trigger store", "err", err)
}
w.WriteHeader(http.StatusInternalServerError)
_, _ = w.Write([]byte("store error"))
return
}
if s.Summarizer == nil {
w.WriteHeader(http.StatusServiceUnavailable)
_, _ = w.Write([]byte("summarizer not configured"))
return
}
// Timeout summarization to avoid hung requests.
ctxSum, cancel := context.WithTimeout(ctx, 60*time.Second)
defer cancel()
summary, err := s.Summarizer.Summarize(ctxSum, channel, msgs, window)
if err != nil {
if s.Logger != nil {
s.Logger.Error("http trigger summarizer", "err", err)
}
w.WriteHeader(http.StatusBadGateway)
_, _ = w.Write([]byte("summarizer error"))
return
}
if s.Notifier != nil {
title := fmt.Sprintf("IRC digest %s (%s)", channel, window)
_ = s.Notifier.Notify(ctx, title, summary)
if s.Metrics != nil {
atomic.AddInt64(&s.Metrics.NotificationsSent, 1)
}
}
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
_, _ = w.Write([]byte(summary))
}
func (s *Server) handleTail(w http.ResponseWriter, r *http.Request) {
if s.AuthToken != "" {
if !checkAuth(r, s.AuthToken) {
w.WriteHeader(http.StatusUnauthorized)
_, _ = w.Write([]byte("unauthorized"))
return
}
}
channel := r.URL.Query().Get("channel")
if channel == "" {
w.WriteHeader(http.StatusBadRequest)
_, _ = w.Write([]byte("missing channel"))
return
}
limit := getIntQuery(r, "limit", 50)
msgs, err := s.Store.ListRecentMessages(r.Context(), channel, limit)
if err != nil {
if s.Logger != nil {
s.Logger.Error("http tail store", "err", err)
}
w.WriteHeader(http.StatusInternalServerError)
_, _ = w.Write([]byte("store error"))
return
}
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
for i := len(msgs) - 1; i >= 0; i-- {
m := msgs[i]
_, _ = w.Write([]byte(m.Time.UTC().Format(time.RFC3339) + " " + m.Author + " " + channel + " " + m.Body + "\n"))
}
}
func (s *Server) handleMetrics(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/plain; version=0.0.4")
msgs := int64(0)
nots := int64(0)
pruned := int64(0)
conn := int64(0)
if s.Metrics != nil {
msgs = atomic.LoadInt64(&s.Metrics.MessagesIngested)
nots = atomic.LoadInt64(&s.Metrics.NotificationsSent)
pruned = atomic.LoadInt64(&s.Metrics.MessagesPruned)
conn = atomic.LoadInt64(&s.Metrics.ConnectedGauge)
}
_, _ = fmt.Fprintf(w, "sojuboy_messages_ingested_total %d\n", msgs)
_, _ = fmt.Fprintf(w, "sojuboy_notifications_sent_total %d\n", nots)
_, _ = fmt.Fprintf(w, "sojuboy_messages_pruned_total %d\n", pruned)
_, _ = fmt.Fprintf(w, "sojuboy_connected %d\n", conn)
}
func checkAuth(r *http.Request, token string) bool {
auth := r.Header.Get("Authorization")
if strings.HasPrefix(auth, "Bearer ") {
if strings.TrimPrefix(auth, "Bearer ") == token {
return true
}
}
if r.URL.Query().Get("token") == token {
return true
}
user, pass, ok := r.BasicAuth()
if ok && user == "token" && pass == token {
return true
}
if r.Header.Get("X-Auth-Token") == token {
return true
}
return false
}
func getIntQuery(r *http.Request, key string, def int) int {
if v := r.URL.Query().Get(key); v != "" {
if n, err := strconv.Atoi(v); err == nil {
return n
}
}
return def
}

View file

@ -0,0 +1,7 @@
package ircclient
import "encoding/base64"
func base64Encode(b []byte) string {
return base64.StdEncoding.EncodeToString(b)
}

View file

@ -0,0 +1,24 @@
package logging
import (
"log/slog"
"os"
"strings"
)
// New returns a JSON slog logger configured to the provided level string (debug, info, warn, error).
func New(level string) *slog.Logger {
lvl := slog.LevelInfo
switch strings.ToLower(strings.TrimSpace(level)) {
case "debug":
lvl = slog.LevelDebug
case "info", "":
lvl = slog.LevelInfo
case "warn", "warning":
lvl = slog.LevelWarn
case "err", "error":
lvl = slog.LevelError
}
h := slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{Level: lvl})
return slog.New(h)
}

View file

@ -0,0 +1,8 @@
package notifier
import "context"
type Notifier interface {
Notify(ctx context.Context, title, message string) error
}

View file

@ -0,0 +1,39 @@
package notifier
import (
"context"
"strings"
"github.com/gregdel/pushover"
)
type PushoverNotifier struct {
app *pushover.Pushover
userKey string
}
func NewPushover(userKey, apiToken string) *PushoverNotifier {
return &PushoverNotifier{
app: pushover.New(apiToken),
userKey: userKey,
}
}
func (p *PushoverNotifier) Notify(ctx context.Context, title, message string) error {
if p == nil || p.app == nil || p.userKey == "" {
return nil
}
if len(message) > 1024 {
message = message[:1024]
}
title = strings.TrimSpace(title)
msg := &pushover.Message{
Title: title,
Message: message,
}
recipient := pushover.NewRecipient(p.userKey)
_, err := p.app.SendMessage(msg, recipient)
return err
}

View file

@ -0,0 +1,31 @@
package scheduler
import (
"context"
"log"
"time"
"github.com/robfig/cron/v3"
)
// Start runs the cron scheduler until ctx is done.
func Start(ctx context.Context, spec string, job func(now time.Time), logger *log.Logger) error {
c := cron.New(cron.WithParser(cron.NewParser(cron.SecondOptional|cron.Minute|cron.Hour|cron.Dom|cron.Month|cron.Dow|cron.Descriptor)))
_, err := c.AddFunc(spec, func() {
job(time.Now())
})
if err != nil {
return err
}
if logger != nil {
logger.Printf("scheduler started: %s", spec)
}
c.Start()
go func() {
<-ctx.Done()
c.Stop()
}()
return nil
}

309
internal/soju/rawclient.go Normal file
View file

@ -0,0 +1,309 @@
package soju
import (
"bufio"
"context"
"crypto/tls"
"fmt"
"log/slog"
"net"
"strconv"
"strings"
"sync/atomic"
"time"
"sojuboy/internal/store"
irc "github.com/sorcix/irc"
)
type RawClient struct {
Server string
Port int
UseTLS bool
Nick string
Username string // full identity: username/network@client
Realname string
Password string // PASS <password>
Channels []string
// Number of messages to fetch via CHATHISTORY LATEST per channel after join.
BackfillLatest int
OnPrivmsg func(channel, author, text, msgid string, at time.Time)
Logger *slog.Logger
Debug bool
// Store is used to compute last-seen timestamp for CHATHISTORY.
Store *store.Store
// Readiness/metrics hooks
ConnectedGauge *int64 // 0/1
IsReady *int32 // 0/1 atomic flag
}
func (c *RawClient) setConnected(v bool) {
if c.ConnectedGauge != nil {
if v {
atomic.StoreInt64(c.ConnectedGauge, 1)
} else {
atomic.StoreInt64(c.ConnectedGauge, 0)
}
}
if c.IsReady != nil {
if v {
atomic.StoreInt32(c.IsReady, 1)
} else {
atomic.StoreInt32(c.IsReady, 0)
}
}
}
func (c *RawClient) Run(ctx context.Context) error {
backoff := time.Second
for {
if err := c.runOnce(ctx); err != nil {
if ctx.Err() != nil {
return ctx.Err()
}
if c.Logger != nil {
c.Logger.Error("raw soju client stopped", "err", err)
}
time.Sleep(backoff)
if backoff < 30*time.Second {
backoff *= 2
}
continue
}
return nil
}
}
func (c *RawClient) runOnce(ctx context.Context) error {
address := net.JoinHostPort(c.Server, strconv.Itoa(c.Port))
var conn net.Conn
var err error
if c.UseTLS {
tlsCfg := &tls.Config{ServerName: c.Server, MinVersion: tls.VersionTLS12}
conn, err = tls.Dial("tcp", address, tlsCfg)
} else {
conn, err = net.Dial("tcp", address)
}
if err != nil {
return err
}
defer conn.Close()
rw := bufio.NewReadWriter(bufio.NewReader(conn), bufio.NewWriter(conn))
write := func(line string) error {
out := line
if strings.HasPrefix(strings.ToUpper(line), "PASS ") {
out = "PASS ********"
}
if c.Debug && c.Logger != nil {
c.Logger.Debug("irc>", "line", out)
}
if _, err := rw.WriteString(line + "\r\n"); err != nil {
return err
}
return rw.Flush()
}
// Request capabilities needed for chathistory and accurate timestamps.
_ = write("CAP LS 302")
_ = write("CAP REQ :server-time batch message-tags draft/chathistory draft/event-playback echo-message cap-notify")
_ = write("CAP END")
// Authenticate with PASS/NICK/USER
if c.Password != "" {
if err := write("PASS " + c.Password); err != nil {
return err
}
}
if err := write("NICK " + c.Nick); err != nil {
return err
}
user := c.Username
if user == "" {
user = c.Nick
}
host := c.Server
if err := write(fmt.Sprintf("USER %s %s %s :%s", user, user, host, c.Realname)); err != nil {
return err
}
// Reader loop
connected := false
eventPlayback := false
selfJoined := map[string]bool{}
for {
select {
case <-ctx.Done():
return ctx.Err()
default:
}
rawLine, err := rw.ReadString('\n')
if err != nil {
return err
}
rawLine = strings.TrimRight(rawLine, "\r\n")
if rawLine == "" {
continue
}
if c.Debug && c.Logger != nil {
c.Logger.Debug("irc<", "line", rawLine)
}
// Parse IRCv3 tags if present
var tags map[string]string
line := rawLine
if strings.HasPrefix(line, "@") {
sp := strings.IndexByte(line, ' ')
if sp > 0 {
tags = parseTags(line[1:sp])
line = strings.TrimSpace(line[sp+1:])
}
}
msg := irc.ParseMessage(line)
if msg == nil {
continue
}
cmd := strings.ToUpper(msg.Command)
switch cmd {
case "CAP":
// Examples: :bnc CAP * ACK :server-time batch message-tags draft/chathistory draft/event-playback
if len(msg.Params) >= 3 {
sub := strings.ToUpper(msg.Params[1])
caps := strings.TrimPrefix(msg.Params[2], ":")
switch sub {
case "ACK":
if strings.Contains(caps, "draft/event-playback") {
eventPlayback = true
if c.Logger != nil {
c.Logger.Info("cap enabled", "cap", "draft/event-playback")
}
}
case "NEW":
if strings.Contains(caps, "draft/event-playback") && !eventPlayback {
_ = write("CAP REQ :draft/event-playback")
}
}
}
case "PING":
if len(msg.Params) > 0 {
_ = write("PONG :" + msg.Params[len(msg.Params)-1])
}
case "001": // welcome
connected = true
c.setConnected(true)
if c.Logger != nil {
c.Logger.Info("connected", "server", c.Server, "auth", "raw")
}
for _, ch := range c.Channels {
_ = write("JOIN " + ch)
if c.Logger != nil {
c.Logger.Info("join requested", "channel", ch)
}
}
case "JOIN":
if len(msg.Params) == 0 {
break
}
ch := msg.Params[0]
nick := nickFromPrefix(msg.Prefix)
if c.Logger != nil {
c.Logger.Info("joined", "channel", ch, "nick", nick)
}
if nick == c.Nick && !selfJoined[ch] {
selfJoined[ch] = true
if !eventPlayback && c.BackfillLatest > 0 {
// Use last seen timestamp if available
since := time.Now().Add(-24 * time.Hour) // default fallback
if c.Store != nil {
if t, ok, err := c.Store.LastMessageTime(ctx, ch); err == nil && ok {
since = t
}
}
// ISO-8601 / RFC3339 format
ts := since.UTC().Format(time.RFC3339Nano)
_ = write(fmt.Sprintf("CHATHISTORY LATEST %s timestamp=%s %d", ch, ts, c.BackfillLatest))
}
}
case "PRIVMSG":
if len(msg.Params) < 1 {
continue
}
target := msg.Params[0]
var text string
if len(msg.Params) >= 2 {
text = msg.Params[1]
} else if msg.Trailing != "" {
text = msg.Trailing
} else {
continue
}
at := time.Now()
if ts, ok := tags["time"]; ok && ts != "" {
if t, e := time.Parse(time.RFC3339Nano, ts); e == nil {
at = t
} else if t2, e2 := time.Parse(time.RFC3339, ts); e2 == nil {
at = t2
}
}
msgid := tags["soju-msgid"]
if msgid == "" {
msgid = tags["msgid"]
}
if c.OnPrivmsg != nil {
c.OnPrivmsg(target, nickFromPrefix(msg.Prefix), text, msgid, at)
}
case "ERROR":
c.setConnected(false)
return fmt.Errorf("server closed: %s", strings.Join(msg.Params, " "))
}
_ = connected
}
}
func nickFromPrefix(pfx *irc.Prefix) string {
if pfx == nil {
return ""
}
if pfx.Name != "" {
return pfx.Name
}
if pfx.User != "" {
return pfx.User
}
if pfx.Host != "" {
return pfx.Host
}
return ""
}
func parseTags(s string) map[string]string {
out := make(map[string]string)
if s == "" {
return out
}
parts := strings.Split(s, ";")
for _, p := range parts {
if p == "" {
continue
}
kv := strings.SplitN(p, "=", 2)
key := kv[0]
val := ""
if len(kv) == 2 {
val = kv[1]
}
// No unescape implemented; good enough for 'time' and 'batch'
out[key] = val
}
return out
}

153
internal/store/store.go Normal file
View file

@ -0,0 +1,153 @@
package store
import (
"context"
"database/sql"
"errors"
"time"
_ "modernc.org/sqlite"
)
type Store struct {
db *sql.DB
}
type Message struct {
ID int64
Channel string
Author string
Body string
Time time.Time
MsgID string
}
func Open(ctx context.Context, path string) (*Store, error) {
db, err := sql.Open("sqlite", path)
if err != nil {
return nil, err
}
db.SetMaxOpenConns(1)
if _, err := db.ExecContext(ctx, `PRAGMA journal_mode = WAL; PRAGMA foreign_keys = ON;`); err != nil {
_ = db.Close()
return nil, err
}
if err := initSchema(ctx, db); err != nil {
_ = db.Close()
return nil, err
}
// Best-effort migration: add msgid column and unique index if missing
_, _ = db.ExecContext(ctx, `ALTER TABLE messages ADD COLUMN msgid TEXT`)
_, _ = db.ExecContext(ctx, `CREATE UNIQUE INDEX IF NOT EXISTS idx_messages_msgid ON messages(msgid) WHERE msgid IS NOT NULL`)
return &Store{db: db}, nil
}
func (s *Store) Close() error { return s.db.Close() }
func initSchema(ctx context.Context, db *sql.DB) error {
const schema = `
CREATE TABLE IF NOT EXISTS messages (
id INTEGER PRIMARY KEY AUTOINCREMENT,
channel TEXT NOT NULL,
author TEXT NOT NULL,
body TEXT NOT NULL,
at TIMESTAMP NOT NULL,
msgid TEXT
);
CREATE INDEX IF NOT EXISTS idx_messages_channel_at ON messages(channel, at);
`
_, err := db.ExecContext(ctx, schema)
return err
}
func (s *Store) InsertMessage(ctx context.Context, m Message) error {
_, err := s.db.ExecContext(ctx,
"INSERT OR IGNORE INTO messages(channel, author, body, at, msgid) VALUES(?,?,?,?,?)",
m.Channel, m.Author, m.Body, m.Time.UTC(), nullIfEmpty(m.MsgID))
return err
}
func nullIfEmpty(s string) any {
if s == "" {
return nil
}
return s
}
func (s *Store) ListMessagesSince(ctx context.Context, channel string, since time.Time) ([]Message, error) {
rows, err := s.db.QueryContext(ctx,
"SELECT id, channel, author, body, at, msgid FROM messages WHERE lower(channel) = lower(?) AND at >= ? ORDER BY at ASC",
channel, since.UTC())
if err != nil {
return nil, err
}
defer rows.Close()
var out []Message
for rows.Next() {
var m Message
var at time.Time
var msgid sql.NullString
if err := rows.Scan(&m.ID, &m.Channel, &m.Author, &m.Body, &at, &msgid); err != nil {
return nil, err
}
m.Time = at
if msgid.Valid {
m.MsgID = msgid.String
}
out = append(out, m)
}
return out, rows.Err()
}
// ListRecentMessages returns the most recent N messages for a channel.
func (s *Store) ListRecentMessages(ctx context.Context, channel string, limit int) ([]Message, error) {
if limit <= 0 {
limit = 50
}
rows, err := s.db.QueryContext(ctx,
"SELECT id, channel, author, body, at, msgid FROM messages WHERE lower(channel) = lower(?) ORDER BY at DESC LIMIT ?",
channel, limit,
)
if err != nil {
return nil, err
}
defer rows.Close()
var out []Message
for rows.Next() {
var m Message
var at time.Time
var msgid sql.NullString
if err := rows.Scan(&m.ID, &m.Channel, &m.Author, &m.Body, &at, &msgid); err != nil {
return nil, err
}
m.Time = at
if msgid.Valid {
m.MsgID = msgid.String
}
out = append(out, m)
}
return out, rows.Err()
}
// LastMessageTime returns the last stored timestamp for a channel.
func (s *Store) LastMessageTime(ctx context.Context, channel string) (time.Time, bool, error) {
var nt sql.NullTime
err := s.db.QueryRowContext(ctx, "SELECT MAX(at) FROM messages WHERE lower(channel) = lower(?)", channel).Scan(&nt)
if err != nil {
return time.Time{}, false, err
}
if !nt.Valid {
return time.Time{}, false, nil
}
return nt.Time, true, nil
}
func (s *Store) DeleteOlderThan(ctx context.Context, cutoff time.Time) (int64, error) {
res, err := s.db.ExecContext(ctx, "DELETE FROM messages WHERE at < ?", cutoff.UTC())
if err != nil {
return 0, err
}
return res.RowsAffected()
}
var ErrNotFound = errors.New("not found")

View file

@ -0,0 +1,204 @@
package summarizer
import (
"context"
"io"
"net/http"
"regexp"
"strings"
"time"
openai "github.com/sashabaranov/go-openai"
"sojuboy/internal/config"
"sojuboy/internal/store"
)
type OpenAI struct {
apiKey string
baseURL string
model string
maxTokens int
// runtime cfg
followLinks bool
linkTimeout time.Duration
linkMaxBytes int
groupWindow time.Duration
maxLinks int
}
func NewOpenAI(apiKey, baseURL, model string, maxTokens int) *OpenAI {
return &OpenAI{apiKey: apiKey, baseURL: baseURL, model: model, maxTokens: maxTokens,
followLinks: true, linkTimeout: 6 * time.Second, linkMaxBytes: 262144, groupWindow: 90 * time.Second, maxLinks: 5,
}
}
// Configure from app config
func (o *OpenAI) ApplyConfig(cfg config.Config) {
o.followLinks = cfg.SummFollowLinks
o.linkTimeout = cfg.SummLinkTimeout
o.linkMaxBytes = cfg.SummLinkMaxBytes
o.groupWindow = cfg.SummGroupWindow
o.maxLinks = cfg.SummMaxLinks
}
func (o *OpenAI) Summarize(ctx context.Context, channel string, msgs []store.Message, window time.Duration) (string, error) {
if o == nil || o.apiKey == "" {
return "", nil
}
cfg := openai.DefaultConfig(o.apiKey)
if strings.TrimSpace(o.baseURL) != "" {
cfg.BaseURL = o.baseURL
}
client := openai.NewClientWithConfig(cfg)
// 1) Group multiline posts from same author within groupWindow
grouped := groupMessages(msgs, o.groupWindow)
// 2) Extract links and optionally fetch a small amount of content
links := extractLinks(grouped)
if o.followLinks && len(links) > 0 {
links = fetchLinkSnippets(ctx, links, o.linkTimeout, o.linkMaxBytes, o.maxLinks)
}
// 3) Build a concise, natural prompt
var b strings.Builder
b.WriteString("Channel: ")
b.WriteString(channel)
b.WriteString("\nTime window: ")
b.WriteString(window.String())
b.WriteString("\n\nTranscript (grouped by author):\n")
for _, g := range grouped {
b.WriteString(g.time.Format(time.RFC3339))
b.WriteString(" ")
b.WriteString(g.author)
b.WriteString(": ")
b.WriteString(g.text)
b.WriteString("\n")
}
if len(links) > 0 {
b.WriteString("\nReferenced content (snippets):\n")
for _, ln := range links {
b.WriteString("- ")
b.WriteString(ln.url)
b.WriteString(" → ")
b.WriteString(ln.snippet)
b.WriteString("\n")
}
}
b.WriteString("\nWrite a concise, readable summary of the conversation above.\n")
b.WriteString("- Focus on what happened and why it matters.\n")
b.WriteString("- Integrate linked content and pasted multi-line posts naturally.\n")
b.WriteString("- Avoid rigid sections; use short paragraphs or light bullets if helpful.\n")
b.WriteString("- Keep it compact but dont omit important context.\n")
prompt := b.String()
sys := "You summarize IRC transcripts. Be concise, natural, and informative."
model := o.model
if strings.TrimSpace(model) == "" {
model = "gpt-4o-mini"
}
reasoningLike := strings.HasPrefix(model, "gpt-5") || strings.HasPrefix(model, "o1") || strings.Contains(model, "reasoning")
req := openai.ChatCompletionRequest{
Model: model,
Messages: []openai.ChatCompletionMessage{
{Role: openai.ChatMessageRoleSystem, Content: sys},
{Role: openai.ChatMessageRoleUser, Content: prompt},
},
MaxCompletionTokens: o.maxTokens,
}
if !reasoningLike {
req.Temperature = 0.3
}
resp, err := client.CreateChatCompletion(ctx, req)
if err != nil {
return "", err
}
if len(resp.Choices) == 0 {
return "", nil
}
return strings.TrimSpace(resp.Choices[0].Message.Content), nil
}
type linkSnippet struct {
url string
snippet string
}
type groupedMsg struct {
time time.Time
author string
text string
}
func groupMessages(msgs []store.Message, window time.Duration) []groupedMsg {
if len(msgs) == 0 {
return nil
}
var out []groupedMsg
cur := groupedMsg{time: msgs[0].Time, author: msgs[0].Author, text: msgs[0].Body}
for i := 1; i < len(msgs); i++ {
m := msgs[i]
if m.Author == cur.author && m.Time.Sub(cur.time) <= window {
cur.text += "\n" + m.Body
continue
}
out = append(out, cur)
cur = groupedMsg{time: m.Time, author: m.Author, text: m.Body}
}
out = append(out, cur)
return out
}
var linkRe = regexp.MustCompile(`https?://\S+`)
func extractLinks(msgs []groupedMsg) []linkSnippet {
var links []linkSnippet
for _, g := range msgs {
for _, m := range linkRe.FindAllString(g.text, -1) {
links = append(links, linkSnippet{url: m})
}
}
return links
}
func fetchLinkSnippets(ctx context.Context, links []linkSnippet, timeout time.Duration, maxBytes int, maxLinks int) []linkSnippet {
client := &http.Client{Timeout: timeout}
if len(links) > maxLinks {
links = links[:maxLinks]
}
out := make([]linkSnippet, 0, len(links))
for _, ln := range links {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, ln.url, nil)
if err != nil {
continue
}
resp, err := client.Do(req)
if err != nil {
continue
}
func() {
defer resp.Body.Close()
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
return
}
limited := io.LimitedReader{R: resp.Body, N: int64(maxBytes)}
b, err := io.ReadAll(&limited)
if err != nil || len(b) == 0 {
return
}
// naive text cleanup
text := string(b)
text = strings.ReplaceAll(text, "\r", "")
text = strings.TrimSpace(text)
if len(text) > 800 {
text = text[:800]
}
out = append(out, linkSnippet{url: ln.url, snippet: text})
}()
}
return out
}

View file

@ -0,0 +1,14 @@
package summarizer
import (
"context"
"time"
"sojuboy/internal/store"
)
type Summarizer interface {
Summarize(ctx context.Context, channel string, msgs []store.Message, window time.Duration) (string, error)
}