package summarizer import ( "context" "io" "net/http" "net/url" "regexp" "sort" "strconv" "strings" "time" readability "github.com/go-shiori/go-readability" openai "github.com/sashabaranov/go-openai" "sojuboy/internal/config" "sojuboy/internal/store" ) type OpenAI struct { apiKey string baseURL string model string maxTokens int // runtime cfg followLinks bool linkTimeout time.Duration linkMaxBytes int groupWindow time.Duration maxLinks int maxGroups int } func NewOpenAI(apiKey, baseURL, model string, maxTokens int) *OpenAI { return &OpenAI{apiKey: apiKey, baseURL: baseURL, model: model, maxTokens: maxTokens, followLinks: true, linkTimeout: 6 * time.Second, linkMaxBytes: 262144, groupWindow: 90 * time.Second, maxLinks: 5, } } // Configure from app config func (o *OpenAI) ApplyConfig(cfg config.Config) { o.followLinks = cfg.SummFollowLinks o.linkTimeout = cfg.SummLinkTimeout o.linkMaxBytes = cfg.SummLinkMaxBytes o.groupWindow = cfg.SummGroupWindow o.maxLinks = cfg.SummMaxLinks o.maxGroups = cfg.SummMaxGroups } func (o *OpenAI) Summarize(ctx context.Context, channel string, msgs []store.Message, window time.Duration) (string, error) { if o == nil || o.apiKey == "" { return "", nil } cfg := openai.DefaultConfig(o.apiKey) if strings.TrimSpace(o.baseURL) != "" { cfg.BaseURL = o.baseURL } client := openai.NewClientWithConfig(cfg) // 1) Group multiline posts from same author within groupWindow grouped := groupMessages(msgs, o.groupWindow) // Apply group cap if configured (>0). 0 means no cap. if o.maxGroups > 0 && len(grouped) > o.maxGroups { grouped = grouped[len(grouped)-o.maxGroups:] } // 2) Extract links and optionally fetch content links := extractLinks(grouped) // Split image vs non-image var imageURLs []string var nonImageLinks []linkSnippet for _, l := range links { if isImageURL(l.url) { imageURLs = append(imageURLs, l.url) } else { nonImageLinks = append(nonImageLinks, l) } } if o.followLinks && len(nonImageLinks) > 0 { nonImageLinks = fetchLinkSnippets(ctx, nonImageLinks, o.linkTimeout, o.linkMaxBytes, o.maxLinks) } // 3) Build a concise, natural prompt var b strings.Builder b.WriteString("Channel: ") b.WriteString(channel) b.WriteString("\nTime window: ") b.WriteString(window.String()) b.WriteString("\n\nTranscript (grouped by author):\n") for _, g := range grouped { b.WriteString(g.time.Format(time.RFC3339)) b.WriteString(" ") b.WriteString(g.author) b.WriteString(": ") b.WriteString(g.text) b.WriteString("\n") } if len(nonImageLinks) > 0 { b.WriteString("\nReferenced content (snippets):\n") for _, ln := range nonImageLinks { b.WriteString("- ") b.WriteString(ln.url) b.WriteString(" → ") b.WriteString(ln.snippet) b.WriteString("\n") } } b.WriteString("\nWrite a concise, readable summary of the conversation above.\n") b.WriteString("- Focus on what happened and why it matters.\n") b.WriteString("- Integrate linked content and pasted multi-line posts naturally.\n") b.WriteString("- Avoid rigid sections; use short paragraphs or light bullets if helpful.\n") b.WriteString("- Keep it compact but don’t omit important context.\n") prompt := b.String() sys := "You summarize IRC transcripts. Be concise, natural, and informative." model := o.model if strings.TrimSpace(model) == "" { model = "gpt-4o-mini" } reasoningLike := strings.HasPrefix(model, "gpt-5") || strings.HasPrefix(model, "o1") || strings.Contains(model, "reasoning") // Build multimodal user message parts userParts := []openai.ChatMessagePart{{Type: openai.ChatMessagePartTypeText, Text: prompt}} // Limit images to o.maxLinks to avoid overloading maxImgs := o.maxLinks if len(imageURLs) > maxImgs { imageURLs = imageURLs[:maxImgs] } for _, u := range imageURLs { userParts = append(userParts, openai.ChatMessagePart{ Type: openai.ChatMessagePartTypeImageURL, ImageURL: &openai.ChatMessageImageURL{URL: u}, }) } req := openai.ChatCompletionRequest{ Model: model, Messages: []openai.ChatCompletionMessage{ {Role: openai.ChatMessageRoleSystem, Content: sys}, {Role: openai.ChatMessageRoleUser, MultiContent: userParts}, }, MaxCompletionTokens: o.maxTokens, } if !reasoningLike { req.Temperature = 0.3 } resp, err := client.CreateChatCompletion(ctx, req) if err != nil { return "", err } if len(resp.Choices) == 0 { return localFallbackSummary(grouped, append(nonImageLinks, linksFromImages(imageURLs)...)), nil } out := strings.TrimSpace(resp.Choices[0].Message.Content) if out == "" { return localFallbackSummary(grouped, append(nonImageLinks, linksFromImages(imageURLs)...)), nil } return out, nil } func linksFromImages(imgs []string) []linkSnippet { out := make([]linkSnippet, 0, len(imgs)) for _, u := range imgs { out = append(out, linkSnippet{url: u}) } return out } func isImageURL(u string) bool { lu := strings.ToLower(u) for _, ext := range []string{".jpg", ".jpeg", ".png", ".gif", ".webp"} { if strings.HasSuffix(lu, ext) { return true } } return false } type linkSnippet struct { url string snippet string } type groupedMsg struct { time time.Time author string text string } func groupMessages(msgs []store.Message, window time.Duration) []groupedMsg { if len(msgs) == 0 { return nil } var out []groupedMsg cur := groupedMsg{time: msgs[0].Time, author: msgs[0].Author, text: msgs[0].Body} for i := 1; i < len(msgs); i++ { m := msgs[i] if m.Author == cur.author && m.Time.Sub(cur.time) <= window { cur.text += "\n" + m.Body continue } out = append(out, cur) cur = groupedMsg{time: m.Time, author: m.Author, text: m.Body} } out = append(out, cur) return out } var linkRe = regexp.MustCompile(`https?://\S+`) func extractLinks(msgs []groupedMsg) []linkSnippet { var links []linkSnippet for _, g := range msgs { for _, m := range linkRe.FindAllString(g.text, -1) { links = append(links, linkSnippet{url: m}) } } // de-dup saw := make(map[string]bool) dedup := make([]linkSnippet, 0, len(links)) for _, l := range links { if !saw[l.url] { saw[l.url] = true dedup = append(dedup, l) } } return dedup } func fetchLinkSnippets(ctx context.Context, links []linkSnippet, timeout time.Duration, maxBytes int, maxLinks int) []linkSnippet { client := &http.Client{Timeout: timeout} if len(links) > maxLinks { links = links[:maxLinks] } out := make([]linkSnippet, 0, len(links)) for _, ln := range links { req, err := http.NewRequestWithContext(ctx, http.MethodGet, ln.url, nil) if err != nil { continue } resp, err := client.Do(req) if err != nil { continue } func() { defer resp.Body.Close() if resp.StatusCode < 200 || resp.StatusCode >= 300 { return } limited := io.LimitedReader{R: resp.Body, N: int64(maxBytes)} b, err := io.ReadAll(&limited) if err != nil || len(b) == 0 { return } text := string(b) // Try readability for cleaner article text if baseURL, perr := url.Parse(ln.url); perr == nil { if art, err := readability.FromReader(strings.NewReader(text), baseURL); err == nil { if at := strings.TrimSpace(art.TextContent); at != "" { text = at } } } text = strings.ReplaceAll(text, "\r", "") text = strings.TrimSpace(text) if len(text) > 2000 { text = text[:2000] } out = append(out, linkSnippet{url: ln.url, snippet: text}) }() } return out } func localFallbackSummary(grouped []groupedMsg, links []linkSnippet) string { if len(grouped) == 0 { return "" } // simple counts authors := map[string]int{} for _, g := range grouped { authors[g.author]++ } authorList := make([]string, 0, len(authors)) for a := range authors { authorList = append(authorList, a) } sort.Strings(authorList) var b strings.Builder b.WriteString("Summary (fallback)\n") b.WriteString("- Messages: ") b.WriteString(strconvI(len(grouped))) b.WriteString(" groups by ") b.WriteString(strconvI(len(authors))) b.WriteString(" authors\n") if len(links) > 0 { b.WriteString("- Links: ") for i, l := range links { if i > 0 { b.WriteString(", ") } b.WriteString(l.url) } b.WriteString("\n") } // include last few grouped lines as a teaser tail := grouped if len(tail) > 5 { tail = tail[len(tail)-5:] } for _, g := range tail { b.WriteString("• ") b.WriteString(g.author) b.WriteString(": ") line := g.text if len(line) > 200 { line = line[:200] + "…" } b.WriteString(line) b.WriteString("\n") } return strings.TrimSpace(b.String()) } func strconvI(n int) string { return strconv.Itoa(n) }