package summarizer import ( "context" "encoding/json" "io" "net/http" "net/url" "regexp" "sort" "strconv" "strings" "time" readability "github.com/go-shiori/go-readability" openai "github.com/sashabaranov/go-openai" "sojuboy/internal/config" "sojuboy/internal/store" ) type OpenAI struct { apiKey string baseURL string model string maxTokens int // runtime cfg followLinks bool linkTimeout time.Duration linkMaxBytes int groupWindow time.Duration maxLinks int maxGroups int } func NewOpenAI(apiKey, baseURL, model string, maxTokens int) *OpenAI { return &OpenAI{apiKey: apiKey, baseURL: baseURL, model: model, maxTokens: maxTokens, followLinks: true, linkTimeout: 6 * time.Second, linkMaxBytes: 262144, groupWindow: 90 * time.Second, maxLinks: 5, } } // Configure from app config func (o *OpenAI) ApplyConfig(cfg config.Config) { o.followLinks = cfg.SummFollowLinks o.linkTimeout = cfg.SummLinkTimeout o.linkMaxBytes = cfg.SummLinkMaxBytes o.groupWindow = cfg.SummGroupWindow o.maxLinks = cfg.SummMaxLinks o.maxGroups = cfg.SummMaxGroups } func (o *OpenAI) Summarize(ctx context.Context, channel string, msgs []store.Message, window time.Duration) (string, error) { if o == nil || o.apiKey == "" { return "", nil } cfg := openai.DefaultConfig(o.apiKey) if strings.TrimSpace(o.baseURL) != "" { cfg.BaseURL = o.baseURL } client := openai.NewClientWithConfig(cfg) // 1) Group multiline posts from same author within groupWindow grouped := groupMessages(msgs, o.groupWindow) // Apply group cap if configured (>0). 0 means no cap. if o.maxGroups > 0 && len(grouped) > o.maxGroups { grouped = grouped[len(grouped)-o.maxGroups:] } // 2) Extract links and optionally fetch content links := extractLinks(grouped) // Split image vs non-image var imageURLs []string var nonImageLinks []linkSnippet for _, l := range links { if isImageURL(l.url) { imageURLs = append(imageURLs, l.url) } else { nonImageLinks = append(nonImageLinks, l) } } if o.followLinks && len(nonImageLinks) > 0 { nonImageLinks = fetchLinkSnippets(ctx, nonImageLinks, o.linkTimeout, o.linkMaxBytes, o.maxLinks) } // 3) Build a concise, natural prompt var b strings.Builder b.WriteString("Channel: ") b.WriteString(channel) b.WriteString("\nTime window: ") b.WriteString(window.String()) b.WriteString("\n\nTranscript (grouped by author):\n") for _, g := range grouped { b.WriteString(g.time.Format(time.RFC3339)) b.WriteString(" ") b.WriteString(g.author) b.WriteString(": ") b.WriteString(g.text) b.WriteString("\n") } if len(nonImageLinks) > 0 { b.WriteString("\nReferenced content (snippets):\n") for _, ln := range nonImageLinks { b.WriteString("- ") b.WriteString(ln.url) b.WriteString(" → ") b.WriteString(ln.snippet) b.WriteString("\n") } } b.WriteString("\nWrite a concise, readable summary of the conversation above.\n") b.WriteString("- Focus on what happened and why it matters.\n") b.WriteString("- Integrate linked content and pasted multi-line posts naturally.\n") b.WriteString("- Avoid rigid sections; use short paragraphs or light bullets if helpful.\n") b.WriteString("- Keep it compact but don’t omit important context.\n") prompt := b.String() sys := "You summarize IRC transcripts. Be concise, natural, and informative." model := o.model if strings.TrimSpace(model) == "" { model = "gpt-4o-mini" } reasoningLike := strings.HasPrefix(model, "gpt-5") || strings.HasPrefix(model, "o1") || strings.Contains(model, "reasoning") // Build multimodal user message parts userParts := []openai.ChatMessagePart{{Type: openai.ChatMessagePartTypeText, Text: prompt}} // Limit images to o.maxLinks to avoid overloading maxImgs := o.maxLinks if len(imageURLs) > maxImgs { imageURLs = imageURLs[:maxImgs] } for _, u := range imageURLs { userParts = append(userParts, openai.ChatMessagePart{ Type: openai.ChatMessagePartTypeImageURL, ImageURL: &openai.ChatMessageImageURL{URL: u}, }) } req := openai.ChatCompletionRequest{ Model: model, Messages: []openai.ChatCompletionMessage{ {Role: openai.ChatMessageRoleSystem, Content: sys}, {Role: openai.ChatMessageRoleUser, MultiContent: userParts}, }, MaxCompletionTokens: o.maxTokens, } if !reasoningLike { req.Temperature = 0.3 } resp, err := client.CreateChatCompletion(ctx, req) if err != nil { return "", err } if len(resp.Choices) == 0 { return localFallbackSummary(grouped, append(nonImageLinks, linksFromImages(imageURLs)...)), nil } out := strings.TrimSpace(resp.Choices[0].Message.Content) if out == "" { return localFallbackSummary(grouped, append(nonImageLinks, linksFromImages(imageURLs)...)), nil } return out, nil } // SummarizeForPush produces a digest tailored for push notifications (e.g., Pushover ~1024 chars). // It uses a slightly more constrained prompt to encourage succinct output. func (o *OpenAI) SummarizeForPush(ctx context.Context, channel string, msgs []store.Message, window time.Duration) (string, error) { if o == nil || o.apiKey == "" { return "", nil } cfg := openai.DefaultConfig(o.apiKey) if strings.TrimSpace(o.baseURL) != "" { cfg.BaseURL = o.baseURL } client := openai.NewClientWithConfig(cfg) grouped := groupMessages(msgs, o.groupWindow) if o.maxGroups > 0 && len(grouped) > o.maxGroups { grouped = grouped[len(grouped)-o.maxGroups:] } links := extractLinks(grouped) var imageURLs []string var nonImageLinks []linkSnippet for _, l := range links { if isImageURL(l.url) { imageURLs = append(imageURLs, l.url) } else { nonImageLinks = append(nonImageLinks, l) } } if o.followLinks && len(nonImageLinks) > 0 { nonImageLinks = fetchLinkSnippets(ctx, nonImageLinks, o.linkTimeout, o.linkMaxBytes, o.maxLinks) } var b strings.Builder b.WriteString("Channel: ") b.WriteString(channel) b.WriteString("\nTime window: ") b.WriteString(window.String()) b.WriteString("\n\nTranscript (grouped by author):\n") for _, g := range grouped { b.WriteString(g.time.Format(time.RFC3339)) b.WriteString(" ") b.WriteString(g.author) b.WriteString(": ") b.WriteString(g.text) b.WriteString("\n") } if len(nonImageLinks) > 0 { b.WriteString("\nReferenced content (snippets):\n") for _, ln := range nonImageLinks { b.WriteString("- ") b.WriteString(ln.url) b.WriteString(" → ") b.WriteString(ln.snippet) b.WriteString("\n") } } b.WriteString("\nWrite a concise, readable summary of the conversation above.\n") b.WriteString("- Focus on what happened and why it matters.\n") b.WriteString("- Integrate linked content and pasted multi-line posts naturally.\n") b.WriteString("- Avoid rigid sections; use short paragraphs or light bullets if helpful.\n") b.WriteString("- Keep it compact but don’t omit important context.\n") b.WriteString("- Keep the final output under ~900 characters suitable for a single push notification.\n") prompt := b.String() sys := "You summarize IRC transcripts for a push notification. Be concise, natural, and informative." model := o.model if strings.TrimSpace(model) == "" { model = "gpt-4o-mini" } reasoningLike := strings.HasPrefix(model, "gpt-5") || strings.HasPrefix(model, "o1") || strings.Contains(model, "reasoning") var userParts []openai.ChatMessagePart userParts = append(userParts, openai.ChatMessagePart{Type: openai.ChatMessagePartTypeText, Text: prompt}) for _, u := range imageURLs { userParts = append(userParts, openai.ChatMessagePart{Type: openai.ChatMessagePartTypeImageURL, ImageURL: &openai.ChatMessageImageURL{URL: u}}) } req := openai.ChatCompletionRequest{ Model: model, Messages: []openai.ChatCompletionMessage{ {Role: openai.ChatMessageRoleSystem, Content: sys}, {Role: openai.ChatMessageRoleUser, MultiContent: userParts}, }, MaxCompletionTokens: o.maxTokens, } if !reasoningLike { req.Temperature = 0.3 } resp, err := client.CreateChatCompletion(ctx, req) if err != nil { return "", err } if len(resp.Choices) == 0 { return localFallbackSummary(grouped, append(nonImageLinks, linksFromImages(imageURLs)...)), nil } out := strings.TrimSpace(resp.Choices[0].Message.Content) if out == "" { return localFallbackSummary(grouped, append(nonImageLinks, linksFromImages(imageURLs)...)), nil } return out, nil } func (o *OpenAI) SummarizeLink(ctx context.Context, rawURL string) (string, error) { if o == nil || o.apiKey == "" { return "", nil } cfg := openai.DefaultConfig(o.apiKey) if strings.TrimSpace(o.baseURL) != "" { cfg.BaseURL = o.baseURL } client := openai.NewClientWithConfig(cfg) content := "" title := "" img := "" lu, _ := url.Parse(rawURL) host := strings.ToLower(lu.Host) isYouTube := host == "www.youtube.com" || host == "youtube.com" || host == "m.youtube.com" || host == "youtu.be" ua := "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0 Safari/537.36" accept := "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" if isImageURL(rawURL) { img = rawURL } else if isYouTube { // YouTube: try oEmbed for title + thumbnail watchURL := rawURL if host == "youtu.be" { id := strings.TrimPrefix(lu.Path, "/") watchURL = "https://www.youtube.com/watch?v=" + id } ctx2, cancel := context.WithTimeout(ctx, o.linkTimeout) defer cancel() oembed := "https://www.youtube.com/oembed?format=json&url=" + url.QueryEscape(watchURL) req, _ := http.NewRequestWithContext(ctx2, http.MethodGet, oembed, nil) req.Header.Set("User-Agent", ua) req.Header.Set("Accept", accept) if resp, err := http.DefaultClient.Do(req); err == nil { func() { defer resp.Body.Close() if resp.StatusCode >= 200 && resp.StatusCode < 300 { var oem struct { Title string `json:"title"` Thumb string `json:"thumbnail_url"` } if err := json.NewDecoder(resp.Body).Decode(&oem); err == nil { if oem.Title != "" { title = oem.Title } if oem.Thumb != "" { img = oem.Thumb } } } }() } // No robust transcript grab here; rely on model generalization + title } else if o.followLinks { ctx2, cancel := context.WithTimeout(ctx, o.linkTimeout) defer cancel() req, err := http.NewRequestWithContext(ctx2, http.MethodGet, rawURL, nil) if err == nil { req.Header.Set("User-Agent", ua) req.Header.Set("Accept", accept) req.Header.Set("Accept-Language", "en-US,en;q=0.9") resp, err := http.DefaultClient.Do(req) if err == nil { func() { defer resp.Body.Close() if resp.StatusCode >= 200 && resp.StatusCode < 300 { lr := &io.LimitedReader{R: resp.Body, N: int64(o.linkMaxBytes)} b, _ := io.ReadAll(lr) text := string(b) if base, perr := url.Parse(rawURL); perr == nil { if art, err := readability.FromReader(strings.NewReader(text), base); err == nil { if at := strings.TrimSpace(art.TextContent); at != "" { text = at if title == "" && strings.TrimSpace(art.Title) != "" { title = strings.TrimSpace(art.Title) } } } } text = strings.ReplaceAll(text, "\r", "") text = strings.TrimSpace(text) if len(text) > 6000 { text = text[:6000] } content = text } }() } } } // Build link-specific prompt sys := "You summarize the content at a single URL. You are given extracted text, title, or image/thumbnail. If the extract is limited, infer the best short summary from what’s available. Do not say you can’t open links or ask for more text; if there’s truly nothing usable, return '(no summary)'. Be concise and natural." var userParts []openai.ChatMessagePart b := strings.Builder{} b.WriteString("URL: ") b.WriteString(rawURL) b.WriteString("\n") if title != "" { b.WriteString("Title: ") b.WriteString(title) b.WriteString("\n") } b.WriteString("\n") if content != "" { b.WriteString("Extracted content (may be truncated):\n") b.WriteString(content) b.WriteString("\n\n") } b.WriteString("Write a short, skimmable summary of the page/video/image above. If relevant, include key takeaways and any notable cautions. Keep it under a few short paragraphs.") userParts = append(userParts, openai.ChatMessagePart{Type: openai.ChatMessagePartTypeText, Text: b.String()}) if img != "" { userParts = append(userParts, openai.ChatMessagePart{Type: openai.ChatMessagePartTypeImageURL, ImageURL: &openai.ChatMessageImageURL{URL: img}}) } model := o.model if strings.TrimSpace(model) == "" { model = "gpt-4o-mini" } reasoningLike := strings.HasPrefix(model, "gpt-5") || strings.HasPrefix(model, "o1") || strings.Contains(model, "reasoning") req := openai.ChatCompletionRequest{ Model: model, Messages: []openai.ChatCompletionMessage{ {Role: openai.ChatMessageRoleSystem, Content: sys}, {Role: openai.ChatMessageRoleUser, MultiContent: userParts}, }, MaxCompletionTokens: o.maxTokens, } if !reasoningLike { req.Temperature = 0.2 } resp, err := client.CreateChatCompletion(ctx, req) if err != nil { return "", err } if len(resp.Choices) == 0 { return "", nil } return strings.TrimSpace(resp.Choices[0].Message.Content), nil } func linksFromImages(imgs []string) []linkSnippet { out := make([]linkSnippet, 0, len(imgs)) for _, u := range imgs { out = append(out, linkSnippet{url: u}) } return out } func isImageURL(u string) bool { lu := strings.ToLower(u) for _, ext := range []string{".jpg", ".jpeg", ".png", ".gif", ".webp"} { if strings.HasSuffix(lu, ext) { return true } } return false } type linkSnippet struct { url string snippet string } type groupedMsg struct { time time.Time author string text string } func groupMessages(msgs []store.Message, window time.Duration) []groupedMsg { if len(msgs) == 0 { return nil } var out []groupedMsg cur := groupedMsg{time: msgs[0].Time, author: msgs[0].Author, text: msgs[0].Body} for i := 1; i < len(msgs); i++ { m := msgs[i] if m.Author == cur.author && m.Time.Sub(cur.time) <= window { cur.text += "\n" + m.Body continue } out = append(out, cur) cur = groupedMsg{time: m.Time, author: m.Author, text: m.Body} } out = append(out, cur) return out } var linkRe = regexp.MustCompile(`https?://\S+`) func extractLinks(msgs []groupedMsg) []linkSnippet { var links []linkSnippet for _, g := range msgs { for _, m := range linkRe.FindAllString(g.text, -1) { links = append(links, linkSnippet{url: m}) } } // de-dup saw := make(map[string]bool) dedup := make([]linkSnippet, 0, len(links)) for _, l := range links { if !saw[l.url] { saw[l.url] = true dedup = append(dedup, l) } } return dedup } func fetchLinkSnippets(ctx context.Context, links []linkSnippet, timeout time.Duration, maxBytes int, maxLinks int) []linkSnippet { client := &http.Client{Timeout: timeout} if len(links) > maxLinks { links = links[:maxLinks] } out := make([]linkSnippet, 0, len(links)) for _, ln := range links { req, err := http.NewRequestWithContext(ctx, http.MethodGet, ln.url, nil) if err != nil { continue } resp, err := client.Do(req) if err != nil { continue } func() { defer resp.Body.Close() if resp.StatusCode < 200 || resp.StatusCode >= 300 { return } limited := io.LimitedReader{R: resp.Body, N: int64(maxBytes)} b, err := io.ReadAll(&limited) if err != nil || len(b) == 0 { return } text := string(b) // Try readability for cleaner article text if baseURL, perr := url.Parse(ln.url); perr == nil { if art, err := readability.FromReader(strings.NewReader(text), baseURL); err == nil { if at := strings.TrimSpace(art.TextContent); at != "" { text = at } } } text = strings.ReplaceAll(text, "\r", "") text = strings.TrimSpace(text) if len(text) > 2000 { text = text[:2000] } out = append(out, linkSnippet{url: ln.url, snippet: text}) }() } return out } func localFallbackSummary(grouped []groupedMsg, links []linkSnippet) string { if len(grouped) == 0 { return "" } // simple counts authors := map[string]int{} for _, g := range grouped { authors[g.author]++ } authorList := make([]string, 0, len(authors)) for a := range authors { authorList = append(authorList, a) } sort.Strings(authorList) var b strings.Builder b.WriteString("Summary (fallback)\n") b.WriteString("- Messages: ") b.WriteString(strconvI(len(grouped))) b.WriteString(" groups by ") b.WriteString(strconvI(len(authors))) b.WriteString(" authors\n") if len(links) > 0 { b.WriteString("- Links: ") for i, l := range links { if i > 0 { b.WriteString(", ") } b.WriteString(l.url) } b.WriteString("\n") } // include last few grouped lines as a teaser tail := grouped if len(tail) > 5 { tail = tail[len(tail)-5:] } for _, g := range tail { b.WriteString("• ") b.WriteString(g.author) b.WriteString(": ") line := g.text if len(line) > 200 { line = line[:200] + "…" } b.WriteString(line) b.WriteString("\n") } return strings.TrimSpace(b.String()) } func strconvI(n int) string { return strconv.Itoa(n) }