- soju raw connector with event playback and CHATHISTORY fallback - SQLite store with msgid de-dup and retention job - Mentions + Pushover + tuning; structured JSON logs - Summaries: concise, link-following, multi-line grouping - HTTP: /healthz, /ready, /tail, /trigger, /metrics - Docker: distroless, healthcheck, version metadata - Docs: README, CHANGELOG, compose
204 lines
5.3 KiB
Go
204 lines
5.3 KiB
Go
package summarizer
|
||
|
||
import (
|
||
"context"
|
||
"io"
|
||
"net/http"
|
||
"regexp"
|
||
"strings"
|
||
"time"
|
||
|
||
openai "github.com/sashabaranov/go-openai"
|
||
|
||
"sojuboy/internal/config"
|
||
"sojuboy/internal/store"
|
||
)
|
||
|
||
type OpenAI struct {
|
||
apiKey string
|
||
baseURL string
|
||
model string
|
||
maxTokens int
|
||
// runtime cfg
|
||
followLinks bool
|
||
linkTimeout time.Duration
|
||
linkMaxBytes int
|
||
groupWindow time.Duration
|
||
maxLinks int
|
||
}
|
||
|
||
func NewOpenAI(apiKey, baseURL, model string, maxTokens int) *OpenAI {
|
||
return &OpenAI{apiKey: apiKey, baseURL: baseURL, model: model, maxTokens: maxTokens,
|
||
followLinks: true, linkTimeout: 6 * time.Second, linkMaxBytes: 262144, groupWindow: 90 * time.Second, maxLinks: 5,
|
||
}
|
||
}
|
||
|
||
// Configure from app config
|
||
func (o *OpenAI) ApplyConfig(cfg config.Config) {
|
||
o.followLinks = cfg.SummFollowLinks
|
||
o.linkTimeout = cfg.SummLinkTimeout
|
||
o.linkMaxBytes = cfg.SummLinkMaxBytes
|
||
o.groupWindow = cfg.SummGroupWindow
|
||
o.maxLinks = cfg.SummMaxLinks
|
||
}
|
||
|
||
func (o *OpenAI) Summarize(ctx context.Context, channel string, msgs []store.Message, window time.Duration) (string, error) {
|
||
if o == nil || o.apiKey == "" {
|
||
return "", nil
|
||
}
|
||
cfg := openai.DefaultConfig(o.apiKey)
|
||
if strings.TrimSpace(o.baseURL) != "" {
|
||
cfg.BaseURL = o.baseURL
|
||
}
|
||
client := openai.NewClientWithConfig(cfg)
|
||
|
||
// 1) Group multiline posts from same author within groupWindow
|
||
grouped := groupMessages(msgs, o.groupWindow)
|
||
|
||
// 2) Extract links and optionally fetch a small amount of content
|
||
links := extractLinks(grouped)
|
||
if o.followLinks && len(links) > 0 {
|
||
links = fetchLinkSnippets(ctx, links, o.linkTimeout, o.linkMaxBytes, o.maxLinks)
|
||
}
|
||
|
||
// 3) Build a concise, natural prompt
|
||
var b strings.Builder
|
||
b.WriteString("Channel: ")
|
||
b.WriteString(channel)
|
||
b.WriteString("\nTime window: ")
|
||
b.WriteString(window.String())
|
||
b.WriteString("\n\nTranscript (grouped by author):\n")
|
||
for _, g := range grouped {
|
||
b.WriteString(g.time.Format(time.RFC3339))
|
||
b.WriteString(" ")
|
||
b.WriteString(g.author)
|
||
b.WriteString(": ")
|
||
b.WriteString(g.text)
|
||
b.WriteString("\n")
|
||
}
|
||
if len(links) > 0 {
|
||
b.WriteString("\nReferenced content (snippets):\n")
|
||
for _, ln := range links {
|
||
b.WriteString("- ")
|
||
b.WriteString(ln.url)
|
||
b.WriteString(" → ")
|
||
b.WriteString(ln.snippet)
|
||
b.WriteString("\n")
|
||
}
|
||
}
|
||
b.WriteString("\nWrite a concise, readable summary of the conversation above.\n")
|
||
b.WriteString("- Focus on what happened and why it matters.\n")
|
||
b.WriteString("- Integrate linked content and pasted multi-line posts naturally.\n")
|
||
b.WriteString("- Avoid rigid sections; use short paragraphs or light bullets if helpful.\n")
|
||
b.WriteString("- Keep it compact but don’t omit important context.\n")
|
||
prompt := b.String()
|
||
|
||
sys := "You summarize IRC transcripts. Be concise, natural, and informative."
|
||
|
||
model := o.model
|
||
if strings.TrimSpace(model) == "" {
|
||
model = "gpt-4o-mini"
|
||
}
|
||
reasoningLike := strings.HasPrefix(model, "gpt-5") || strings.HasPrefix(model, "o1") || strings.Contains(model, "reasoning")
|
||
|
||
req := openai.ChatCompletionRequest{
|
||
Model: model,
|
||
Messages: []openai.ChatCompletionMessage{
|
||
{Role: openai.ChatMessageRoleSystem, Content: sys},
|
||
{Role: openai.ChatMessageRoleUser, Content: prompt},
|
||
},
|
||
MaxCompletionTokens: o.maxTokens,
|
||
}
|
||
if !reasoningLike {
|
||
req.Temperature = 0.3
|
||
}
|
||
|
||
resp, err := client.CreateChatCompletion(ctx, req)
|
||
if err != nil {
|
||
return "", err
|
||
}
|
||
if len(resp.Choices) == 0 {
|
||
return "", nil
|
||
}
|
||
return strings.TrimSpace(resp.Choices[0].Message.Content), nil
|
||
}
|
||
|
||
type linkSnippet struct {
|
||
url string
|
||
snippet string
|
||
}
|
||
|
||
type groupedMsg struct {
|
||
time time.Time
|
||
author string
|
||
text string
|
||
}
|
||
|
||
func groupMessages(msgs []store.Message, window time.Duration) []groupedMsg {
|
||
if len(msgs) == 0 {
|
||
return nil
|
||
}
|
||
var out []groupedMsg
|
||
cur := groupedMsg{time: msgs[0].Time, author: msgs[0].Author, text: msgs[0].Body}
|
||
for i := 1; i < len(msgs); i++ {
|
||
m := msgs[i]
|
||
if m.Author == cur.author && m.Time.Sub(cur.time) <= window {
|
||
cur.text += "\n" + m.Body
|
||
continue
|
||
}
|
||
out = append(out, cur)
|
||
cur = groupedMsg{time: m.Time, author: m.Author, text: m.Body}
|
||
}
|
||
out = append(out, cur)
|
||
return out
|
||
}
|
||
|
||
var linkRe = regexp.MustCompile(`https?://\S+`)
|
||
|
||
func extractLinks(msgs []groupedMsg) []linkSnippet {
|
||
var links []linkSnippet
|
||
for _, g := range msgs {
|
||
for _, m := range linkRe.FindAllString(g.text, -1) {
|
||
links = append(links, linkSnippet{url: m})
|
||
}
|
||
}
|
||
return links
|
||
}
|
||
|
||
func fetchLinkSnippets(ctx context.Context, links []linkSnippet, timeout time.Duration, maxBytes int, maxLinks int) []linkSnippet {
|
||
client := &http.Client{Timeout: timeout}
|
||
if len(links) > maxLinks {
|
||
links = links[:maxLinks]
|
||
}
|
||
out := make([]linkSnippet, 0, len(links))
|
||
for _, ln := range links {
|
||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, ln.url, nil)
|
||
if err != nil {
|
||
continue
|
||
}
|
||
resp, err := client.Do(req)
|
||
if err != nil {
|
||
continue
|
||
}
|
||
func() {
|
||
defer resp.Body.Close()
|
||
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
||
return
|
||
}
|
||
limited := io.LimitedReader{R: resp.Body, N: int64(maxBytes)}
|
||
b, err := io.ReadAll(&limited)
|
||
if err != nil || len(b) == 0 {
|
||
return
|
||
}
|
||
// naive text cleanup
|
||
text := string(b)
|
||
text = strings.ReplaceAll(text, "\r", "")
|
||
text = strings.TrimSpace(text)
|
||
if len(text) > 800 {
|
||
text = text[:800]
|
||
}
|
||
out = append(out, linkSnippet{url: ln.url, snippet: text})
|
||
}()
|
||
}
|
||
return out
|
||
}
|