feat: initial Beta 1 release

- soju raw connector with event playback and CHATHISTORY fallback
- SQLite store with msgid de-dup and retention job
- Mentions + Pushover + tuning; structured JSON logs
- Summaries: concise, link-following, multi-line grouping
- HTTP: /healthz, /ready, /tail, /trigger, /metrics
- Docker: distroless, healthcheck, version metadata
- Docs: README, CHANGELOG, compose
This commit is contained in:
Thomas Cravey 2025-08-15 18:06:28 -05:00
commit 2954e85e7a
19 changed files with 1983 additions and 0 deletions

View file

@ -0,0 +1,204 @@
package summarizer
import (
"context"
"io"
"net/http"
"regexp"
"strings"
"time"
openai "github.com/sashabaranov/go-openai"
"sojuboy/internal/config"
"sojuboy/internal/store"
)
type OpenAI struct {
apiKey string
baseURL string
model string
maxTokens int
// runtime cfg
followLinks bool
linkTimeout time.Duration
linkMaxBytes int
groupWindow time.Duration
maxLinks int
}
func NewOpenAI(apiKey, baseURL, model string, maxTokens int) *OpenAI {
return &OpenAI{apiKey: apiKey, baseURL: baseURL, model: model, maxTokens: maxTokens,
followLinks: true, linkTimeout: 6 * time.Second, linkMaxBytes: 262144, groupWindow: 90 * time.Second, maxLinks: 5,
}
}
// Configure from app config
func (o *OpenAI) ApplyConfig(cfg config.Config) {
o.followLinks = cfg.SummFollowLinks
o.linkTimeout = cfg.SummLinkTimeout
o.linkMaxBytes = cfg.SummLinkMaxBytes
o.groupWindow = cfg.SummGroupWindow
o.maxLinks = cfg.SummMaxLinks
}
func (o *OpenAI) Summarize(ctx context.Context, channel string, msgs []store.Message, window time.Duration) (string, error) {
if o == nil || o.apiKey == "" {
return "", nil
}
cfg := openai.DefaultConfig(o.apiKey)
if strings.TrimSpace(o.baseURL) != "" {
cfg.BaseURL = o.baseURL
}
client := openai.NewClientWithConfig(cfg)
// 1) Group multiline posts from same author within groupWindow
grouped := groupMessages(msgs, o.groupWindow)
// 2) Extract links and optionally fetch a small amount of content
links := extractLinks(grouped)
if o.followLinks && len(links) > 0 {
links = fetchLinkSnippets(ctx, links, o.linkTimeout, o.linkMaxBytes, o.maxLinks)
}
// 3) Build a concise, natural prompt
var b strings.Builder
b.WriteString("Channel: ")
b.WriteString(channel)
b.WriteString("\nTime window: ")
b.WriteString(window.String())
b.WriteString("\n\nTranscript (grouped by author):\n")
for _, g := range grouped {
b.WriteString(g.time.Format(time.RFC3339))
b.WriteString(" ")
b.WriteString(g.author)
b.WriteString(": ")
b.WriteString(g.text)
b.WriteString("\n")
}
if len(links) > 0 {
b.WriteString("\nReferenced content (snippets):\n")
for _, ln := range links {
b.WriteString("- ")
b.WriteString(ln.url)
b.WriteString(" → ")
b.WriteString(ln.snippet)
b.WriteString("\n")
}
}
b.WriteString("\nWrite a concise, readable summary of the conversation above.\n")
b.WriteString("- Focus on what happened and why it matters.\n")
b.WriteString("- Integrate linked content and pasted multi-line posts naturally.\n")
b.WriteString("- Avoid rigid sections; use short paragraphs or light bullets if helpful.\n")
b.WriteString("- Keep it compact but dont omit important context.\n")
prompt := b.String()
sys := "You summarize IRC transcripts. Be concise, natural, and informative."
model := o.model
if strings.TrimSpace(model) == "" {
model = "gpt-4o-mini"
}
reasoningLike := strings.HasPrefix(model, "gpt-5") || strings.HasPrefix(model, "o1") || strings.Contains(model, "reasoning")
req := openai.ChatCompletionRequest{
Model: model,
Messages: []openai.ChatCompletionMessage{
{Role: openai.ChatMessageRoleSystem, Content: sys},
{Role: openai.ChatMessageRoleUser, Content: prompt},
},
MaxCompletionTokens: o.maxTokens,
}
if !reasoningLike {
req.Temperature = 0.3
}
resp, err := client.CreateChatCompletion(ctx, req)
if err != nil {
return "", err
}
if len(resp.Choices) == 0 {
return "", nil
}
return strings.TrimSpace(resp.Choices[0].Message.Content), nil
}
type linkSnippet struct {
url string
snippet string
}
type groupedMsg struct {
time time.Time
author string
text string
}
func groupMessages(msgs []store.Message, window time.Duration) []groupedMsg {
if len(msgs) == 0 {
return nil
}
var out []groupedMsg
cur := groupedMsg{time: msgs[0].Time, author: msgs[0].Author, text: msgs[0].Body}
for i := 1; i < len(msgs); i++ {
m := msgs[i]
if m.Author == cur.author && m.Time.Sub(cur.time) <= window {
cur.text += "\n" + m.Body
continue
}
out = append(out, cur)
cur = groupedMsg{time: m.Time, author: m.Author, text: m.Body}
}
out = append(out, cur)
return out
}
var linkRe = regexp.MustCompile(`https?://\S+`)
func extractLinks(msgs []groupedMsg) []linkSnippet {
var links []linkSnippet
for _, g := range msgs {
for _, m := range linkRe.FindAllString(g.text, -1) {
links = append(links, linkSnippet{url: m})
}
}
return links
}
func fetchLinkSnippets(ctx context.Context, links []linkSnippet, timeout time.Duration, maxBytes int, maxLinks int) []linkSnippet {
client := &http.Client{Timeout: timeout}
if len(links) > maxLinks {
links = links[:maxLinks]
}
out := make([]linkSnippet, 0, len(links))
for _, ln := range links {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, ln.url, nil)
if err != nil {
continue
}
resp, err := client.Do(req)
if err != nil {
continue
}
func() {
defer resp.Body.Close()
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
return
}
limited := io.LimitedReader{R: resp.Body, N: int64(maxBytes)}
b, err := io.ReadAll(&limited)
if err != nil || len(b) == 0 {
return
}
// naive text cleanup
text := string(b)
text = strings.ReplaceAll(text, "\r", "")
text = strings.TrimSpace(text)
if len(text) > 800 {
text = text[:800]
}
out = append(out, linkSnippet{url: ln.url, snippet: text})
}()
}
return out
}