sojuboy/internal/summarizer/openai.go
Thomas Cravey 2954e85e7a feat: initial Beta 1 release
- soju raw connector with event playback and CHATHISTORY fallback
- SQLite store with msgid de-dup and retention job
- Mentions + Pushover + tuning; structured JSON logs
- Summaries: concise, link-following, multi-line grouping
- HTTP: /healthz, /ready, /tail, /trigger, /metrics
- Docker: distroless, healthcheck, version metadata
- Docs: README, CHANGELOG, compose
2025-08-15 18:06:28 -05:00

204 lines
5.3 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package summarizer
import (
"context"
"io"
"net/http"
"regexp"
"strings"
"time"
openai "github.com/sashabaranov/go-openai"
"sojuboy/internal/config"
"sojuboy/internal/store"
)
type OpenAI struct {
apiKey string
baseURL string
model string
maxTokens int
// runtime cfg
followLinks bool
linkTimeout time.Duration
linkMaxBytes int
groupWindow time.Duration
maxLinks int
}
func NewOpenAI(apiKey, baseURL, model string, maxTokens int) *OpenAI {
return &OpenAI{apiKey: apiKey, baseURL: baseURL, model: model, maxTokens: maxTokens,
followLinks: true, linkTimeout: 6 * time.Second, linkMaxBytes: 262144, groupWindow: 90 * time.Second, maxLinks: 5,
}
}
// Configure from app config
func (o *OpenAI) ApplyConfig(cfg config.Config) {
o.followLinks = cfg.SummFollowLinks
o.linkTimeout = cfg.SummLinkTimeout
o.linkMaxBytes = cfg.SummLinkMaxBytes
o.groupWindow = cfg.SummGroupWindow
o.maxLinks = cfg.SummMaxLinks
}
func (o *OpenAI) Summarize(ctx context.Context, channel string, msgs []store.Message, window time.Duration) (string, error) {
if o == nil || o.apiKey == "" {
return "", nil
}
cfg := openai.DefaultConfig(o.apiKey)
if strings.TrimSpace(o.baseURL) != "" {
cfg.BaseURL = o.baseURL
}
client := openai.NewClientWithConfig(cfg)
// 1) Group multiline posts from same author within groupWindow
grouped := groupMessages(msgs, o.groupWindow)
// 2) Extract links and optionally fetch a small amount of content
links := extractLinks(grouped)
if o.followLinks && len(links) > 0 {
links = fetchLinkSnippets(ctx, links, o.linkTimeout, o.linkMaxBytes, o.maxLinks)
}
// 3) Build a concise, natural prompt
var b strings.Builder
b.WriteString("Channel: ")
b.WriteString(channel)
b.WriteString("\nTime window: ")
b.WriteString(window.String())
b.WriteString("\n\nTranscript (grouped by author):\n")
for _, g := range grouped {
b.WriteString(g.time.Format(time.RFC3339))
b.WriteString(" ")
b.WriteString(g.author)
b.WriteString(": ")
b.WriteString(g.text)
b.WriteString("\n")
}
if len(links) > 0 {
b.WriteString("\nReferenced content (snippets):\n")
for _, ln := range links {
b.WriteString("- ")
b.WriteString(ln.url)
b.WriteString(" → ")
b.WriteString(ln.snippet)
b.WriteString("\n")
}
}
b.WriteString("\nWrite a concise, readable summary of the conversation above.\n")
b.WriteString("- Focus on what happened and why it matters.\n")
b.WriteString("- Integrate linked content and pasted multi-line posts naturally.\n")
b.WriteString("- Avoid rigid sections; use short paragraphs or light bullets if helpful.\n")
b.WriteString("- Keep it compact but dont omit important context.\n")
prompt := b.String()
sys := "You summarize IRC transcripts. Be concise, natural, and informative."
model := o.model
if strings.TrimSpace(model) == "" {
model = "gpt-4o-mini"
}
reasoningLike := strings.HasPrefix(model, "gpt-5") || strings.HasPrefix(model, "o1") || strings.Contains(model, "reasoning")
req := openai.ChatCompletionRequest{
Model: model,
Messages: []openai.ChatCompletionMessage{
{Role: openai.ChatMessageRoleSystem, Content: sys},
{Role: openai.ChatMessageRoleUser, Content: prompt},
},
MaxCompletionTokens: o.maxTokens,
}
if !reasoningLike {
req.Temperature = 0.3
}
resp, err := client.CreateChatCompletion(ctx, req)
if err != nil {
return "", err
}
if len(resp.Choices) == 0 {
return "", nil
}
return strings.TrimSpace(resp.Choices[0].Message.Content), nil
}
type linkSnippet struct {
url string
snippet string
}
type groupedMsg struct {
time time.Time
author string
text string
}
func groupMessages(msgs []store.Message, window time.Duration) []groupedMsg {
if len(msgs) == 0 {
return nil
}
var out []groupedMsg
cur := groupedMsg{time: msgs[0].Time, author: msgs[0].Author, text: msgs[0].Body}
for i := 1; i < len(msgs); i++ {
m := msgs[i]
if m.Author == cur.author && m.Time.Sub(cur.time) <= window {
cur.text += "\n" + m.Body
continue
}
out = append(out, cur)
cur = groupedMsg{time: m.Time, author: m.Author, text: m.Body}
}
out = append(out, cur)
return out
}
var linkRe = regexp.MustCompile(`https?://\S+`)
func extractLinks(msgs []groupedMsg) []linkSnippet {
var links []linkSnippet
for _, g := range msgs {
for _, m := range linkRe.FindAllString(g.text, -1) {
links = append(links, linkSnippet{url: m})
}
}
return links
}
func fetchLinkSnippets(ctx context.Context, links []linkSnippet, timeout time.Duration, maxBytes int, maxLinks int) []linkSnippet {
client := &http.Client{Timeout: timeout}
if len(links) > maxLinks {
links = links[:maxLinks]
}
out := make([]linkSnippet, 0, len(links))
for _, ln := range links {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, ln.url, nil)
if err != nil {
continue
}
resp, err := client.Do(req)
if err != nil {
continue
}
func() {
defer resp.Body.Close()
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
return
}
limited := io.LimitedReader{R: resp.Body, N: int64(maxBytes)}
b, err := io.ReadAll(&limited)
if err != nil || len(b) == 0 {
return
}
// naive text cleanup
text := string(b)
text = strings.ReplaceAll(text, "\r", "")
text = strings.TrimSpace(text)
if len(text) > 800 {
text = text[:800]
}
out = append(out, linkSnippet{url: ln.url, snippet: text})
}()
}
return out
}