sojuboy/internal/summarizer/openai.go

477 lines
13 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package summarizer
import (
"context"
"encoding/json"
"io"
"net/http"
"net/url"
"regexp"
"sort"
"strconv"
"strings"
"time"
readability "github.com/go-shiori/go-readability"
openai "github.com/sashabaranov/go-openai"
"sojuboy/internal/config"
"sojuboy/internal/store"
)
type OpenAI struct {
apiKey string
baseURL string
model string
maxTokens int
// runtime cfg
followLinks bool
linkTimeout time.Duration
linkMaxBytes int
groupWindow time.Duration
maxLinks int
maxGroups int
}
func NewOpenAI(apiKey, baseURL, model string, maxTokens int) *OpenAI {
return &OpenAI{apiKey: apiKey, baseURL: baseURL, model: model, maxTokens: maxTokens,
followLinks: true, linkTimeout: 6 * time.Second, linkMaxBytes: 262144, groupWindow: 90 * time.Second, maxLinks: 5,
}
}
// Configure from app config
func (o *OpenAI) ApplyConfig(cfg config.Config) {
o.followLinks = cfg.SummFollowLinks
o.linkTimeout = cfg.SummLinkTimeout
o.linkMaxBytes = cfg.SummLinkMaxBytes
o.groupWindow = cfg.SummGroupWindow
o.maxLinks = cfg.SummMaxLinks
o.maxGroups = cfg.SummMaxGroups
}
func (o *OpenAI) Summarize(ctx context.Context, channel string, msgs []store.Message, window time.Duration) (string, error) {
if o == nil || o.apiKey == "" {
return "", nil
}
cfg := openai.DefaultConfig(o.apiKey)
if strings.TrimSpace(o.baseURL) != "" {
cfg.BaseURL = o.baseURL
}
client := openai.NewClientWithConfig(cfg)
// 1) Group multiline posts from same author within groupWindow
grouped := groupMessages(msgs, o.groupWindow)
// Apply group cap if configured (>0). 0 means no cap.
if o.maxGroups > 0 && len(grouped) > o.maxGroups {
grouped = grouped[len(grouped)-o.maxGroups:]
}
// 2) Extract links and optionally fetch content
links := extractLinks(grouped)
// Split image vs non-image
var imageURLs []string
var nonImageLinks []linkSnippet
for _, l := range links {
if isImageURL(l.url) {
imageURLs = append(imageURLs, l.url)
} else {
nonImageLinks = append(nonImageLinks, l)
}
}
if o.followLinks && len(nonImageLinks) > 0 {
nonImageLinks = fetchLinkSnippets(ctx, nonImageLinks, o.linkTimeout, o.linkMaxBytes, o.maxLinks)
}
// 3) Build a concise, natural prompt
var b strings.Builder
b.WriteString("Channel: ")
b.WriteString(channel)
b.WriteString("\nTime window: ")
b.WriteString(window.String())
b.WriteString("\n\nTranscript (grouped by author):\n")
for _, g := range grouped {
b.WriteString(g.time.Format(time.RFC3339))
b.WriteString(" ")
b.WriteString(g.author)
b.WriteString(": ")
b.WriteString(g.text)
b.WriteString("\n")
}
if len(nonImageLinks) > 0 {
b.WriteString("\nReferenced content (snippets):\n")
for _, ln := range nonImageLinks {
b.WriteString("- ")
b.WriteString(ln.url)
b.WriteString(" → ")
b.WriteString(ln.snippet)
b.WriteString("\n")
}
}
b.WriteString("\nWrite a concise, readable summary of the conversation above.\n")
b.WriteString("- Focus on what happened and why it matters.\n")
b.WriteString("- Integrate linked content and pasted multi-line posts naturally.\n")
b.WriteString("- Avoid rigid sections; use short paragraphs or light bullets if helpful.\n")
b.WriteString("- Keep it compact but dont omit important context.\n")
prompt := b.String()
sys := "You summarize IRC transcripts. Be concise, natural, and informative."
model := o.model
if strings.TrimSpace(model) == "" {
model = "gpt-4o-mini"
}
reasoningLike := strings.HasPrefix(model, "gpt-5") || strings.HasPrefix(model, "o1") || strings.Contains(model, "reasoning")
// Build multimodal user message parts
userParts := []openai.ChatMessagePart{{Type: openai.ChatMessagePartTypeText, Text: prompt}}
// Limit images to o.maxLinks to avoid overloading
maxImgs := o.maxLinks
if len(imageURLs) > maxImgs {
imageURLs = imageURLs[:maxImgs]
}
for _, u := range imageURLs {
userParts = append(userParts, openai.ChatMessagePart{
Type: openai.ChatMessagePartTypeImageURL,
ImageURL: &openai.ChatMessageImageURL{URL: u},
})
}
req := openai.ChatCompletionRequest{
Model: model,
Messages: []openai.ChatCompletionMessage{
{Role: openai.ChatMessageRoleSystem, Content: sys},
{Role: openai.ChatMessageRoleUser, MultiContent: userParts},
},
MaxCompletionTokens: o.maxTokens,
}
if !reasoningLike {
req.Temperature = 0.3
}
resp, err := client.CreateChatCompletion(ctx, req)
if err != nil {
return "", err
}
if len(resp.Choices) == 0 {
return localFallbackSummary(grouped, append(nonImageLinks, linksFromImages(imageURLs)...)), nil
}
out := strings.TrimSpace(resp.Choices[0].Message.Content)
if out == "" {
return localFallbackSummary(grouped, append(nonImageLinks, linksFromImages(imageURLs)...)), nil
}
return out, nil
}
func (o *OpenAI) SummarizeLink(ctx context.Context, rawURL string) (string, error) {
if o == nil || o.apiKey == "" {
return "", nil
}
cfg := openai.DefaultConfig(o.apiKey)
if strings.TrimSpace(o.baseURL) != "" {
cfg.BaseURL = o.baseURL
}
client := openai.NewClientWithConfig(cfg)
content := ""
title := ""
img := ""
lu, _ := url.Parse(rawURL)
host := strings.ToLower(lu.Host)
isYouTube := host == "www.youtube.com" || host == "youtube.com" || host == "m.youtube.com" || host == "youtu.be"
ua := "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0 Safari/537.36"
accept := "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
if isImageURL(rawURL) {
img = rawURL
} else if isYouTube {
// YouTube: try oEmbed for title + thumbnail
watchURL := rawURL
if host == "youtu.be" {
id := strings.TrimPrefix(lu.Path, "/")
watchURL = "https://www.youtube.com/watch?v=" + id
}
ctx2, cancel := context.WithTimeout(ctx, o.linkTimeout)
defer cancel()
oembed := "https://www.youtube.com/oembed?format=json&url=" + url.QueryEscape(watchURL)
req, _ := http.NewRequestWithContext(ctx2, http.MethodGet, oembed, nil)
req.Header.Set("User-Agent", ua)
req.Header.Set("Accept", accept)
if resp, err := http.DefaultClient.Do(req); err == nil {
func() {
defer resp.Body.Close()
if resp.StatusCode >= 200 && resp.StatusCode < 300 {
var oem struct {
Title string `json:"title"`
Thumb string `json:"thumbnail_url"`
}
if err := json.NewDecoder(resp.Body).Decode(&oem); err == nil {
if oem.Title != "" {
title = oem.Title
}
if oem.Thumb != "" {
img = oem.Thumb
}
}
}
}()
}
// No robust transcript grab here; rely on model generalization + title
} else if o.followLinks {
ctx2, cancel := context.WithTimeout(ctx, o.linkTimeout)
defer cancel()
req, err := http.NewRequestWithContext(ctx2, http.MethodGet, rawURL, nil)
if err == nil {
req.Header.Set("User-Agent", ua)
req.Header.Set("Accept", accept)
req.Header.Set("Accept-Language", "en-US,en;q=0.9")
resp, err := http.DefaultClient.Do(req)
if err == nil {
func() {
defer resp.Body.Close()
if resp.StatusCode >= 200 && resp.StatusCode < 300 {
lr := &io.LimitedReader{R: resp.Body, N: int64(o.linkMaxBytes)}
b, _ := io.ReadAll(lr)
text := string(b)
if base, perr := url.Parse(rawURL); perr == nil {
if art, err := readability.FromReader(strings.NewReader(text), base); err == nil {
if at := strings.TrimSpace(art.TextContent); at != "" {
text = at
if title == "" && strings.TrimSpace(art.Title) != "" {
title = strings.TrimSpace(art.Title)
}
}
}
}
text = strings.ReplaceAll(text, "\r", "")
text = strings.TrimSpace(text)
if len(text) > 6000 {
text = text[:6000]
}
content = text
}
}()
}
}
}
// Build link-specific prompt
sys := "You summarize the content at a single URL. You are given extracted text, title, or image/thumbnail. If the extract is limited, infer the best short summary from whats available. Do not say you cant open links or ask for more text; if theres truly nothing usable, return '(no summary)'. Be concise and natural."
var userParts []openai.ChatMessagePart
b := strings.Builder{}
b.WriteString("URL: ")
b.WriteString(rawURL)
b.WriteString("\n")
if title != "" {
b.WriteString("Title: ")
b.WriteString(title)
b.WriteString("\n")
}
b.WriteString("\n")
if content != "" {
b.WriteString("Extracted content (may be truncated):\n")
b.WriteString(content)
b.WriteString("\n\n")
}
b.WriteString("Write a short, skimmable summary of the page/video/image above. If relevant, include key takeaways and any notable cautions. Keep it under a few short paragraphs.")
userParts = append(userParts, openai.ChatMessagePart{Type: openai.ChatMessagePartTypeText, Text: b.String()})
if img != "" {
userParts = append(userParts, openai.ChatMessagePart{Type: openai.ChatMessagePartTypeImageURL, ImageURL: &openai.ChatMessageImageURL{URL: img}})
}
model := o.model
if strings.TrimSpace(model) == "" {
model = "gpt-4o-mini"
}
reasoningLike := strings.HasPrefix(model, "gpt-5") || strings.HasPrefix(model, "o1") || strings.Contains(model, "reasoning")
req := openai.ChatCompletionRequest{
Model: model,
Messages: []openai.ChatCompletionMessage{
{Role: openai.ChatMessageRoleSystem, Content: sys},
{Role: openai.ChatMessageRoleUser, MultiContent: userParts},
},
MaxCompletionTokens: o.maxTokens,
}
if !reasoningLike {
req.Temperature = 0.2
}
resp, err := client.CreateChatCompletion(ctx, req)
if err != nil {
return "", err
}
if len(resp.Choices) == 0 {
return "", nil
}
return strings.TrimSpace(resp.Choices[0].Message.Content), nil
}
func linksFromImages(imgs []string) []linkSnippet {
out := make([]linkSnippet, 0, len(imgs))
for _, u := range imgs {
out = append(out, linkSnippet{url: u})
}
return out
}
func isImageURL(u string) bool {
lu := strings.ToLower(u)
for _, ext := range []string{".jpg", ".jpeg", ".png", ".gif", ".webp"} {
if strings.HasSuffix(lu, ext) {
return true
}
}
return false
}
type linkSnippet struct {
url string
snippet string
}
type groupedMsg struct {
time time.Time
author string
text string
}
func groupMessages(msgs []store.Message, window time.Duration) []groupedMsg {
if len(msgs) == 0 {
return nil
}
var out []groupedMsg
cur := groupedMsg{time: msgs[0].Time, author: msgs[0].Author, text: msgs[0].Body}
for i := 1; i < len(msgs); i++ {
m := msgs[i]
if m.Author == cur.author && m.Time.Sub(cur.time) <= window {
cur.text += "\n" + m.Body
continue
}
out = append(out, cur)
cur = groupedMsg{time: m.Time, author: m.Author, text: m.Body}
}
out = append(out, cur)
return out
}
var linkRe = regexp.MustCompile(`https?://\S+`)
func extractLinks(msgs []groupedMsg) []linkSnippet {
var links []linkSnippet
for _, g := range msgs {
for _, m := range linkRe.FindAllString(g.text, -1) {
links = append(links, linkSnippet{url: m})
}
}
// de-dup
saw := make(map[string]bool)
dedup := make([]linkSnippet, 0, len(links))
for _, l := range links {
if !saw[l.url] {
saw[l.url] = true
dedup = append(dedup, l)
}
}
return dedup
}
func fetchLinkSnippets(ctx context.Context, links []linkSnippet, timeout time.Duration, maxBytes int, maxLinks int) []linkSnippet {
client := &http.Client{Timeout: timeout}
if len(links) > maxLinks {
links = links[:maxLinks]
}
out := make([]linkSnippet, 0, len(links))
for _, ln := range links {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, ln.url, nil)
if err != nil {
continue
}
resp, err := client.Do(req)
if err != nil {
continue
}
func() {
defer resp.Body.Close()
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
return
}
limited := io.LimitedReader{R: resp.Body, N: int64(maxBytes)}
b, err := io.ReadAll(&limited)
if err != nil || len(b) == 0 {
return
}
text := string(b)
// Try readability for cleaner article text
if baseURL, perr := url.Parse(ln.url); perr == nil {
if art, err := readability.FromReader(strings.NewReader(text), baseURL); err == nil {
if at := strings.TrimSpace(art.TextContent); at != "" {
text = at
}
}
}
text = strings.ReplaceAll(text, "\r", "")
text = strings.TrimSpace(text)
if len(text) > 2000 {
text = text[:2000]
}
out = append(out, linkSnippet{url: ln.url, snippet: text})
}()
}
return out
}
func localFallbackSummary(grouped []groupedMsg, links []linkSnippet) string {
if len(grouped) == 0 {
return ""
}
// simple counts
authors := map[string]int{}
for _, g := range grouped {
authors[g.author]++
}
authorList := make([]string, 0, len(authors))
for a := range authors {
authorList = append(authorList, a)
}
sort.Strings(authorList)
var b strings.Builder
b.WriteString("Summary (fallback)\n")
b.WriteString("- Messages: ")
b.WriteString(strconvI(len(grouped)))
b.WriteString(" groups by ")
b.WriteString(strconvI(len(authors)))
b.WriteString(" authors\n")
if len(links) > 0 {
b.WriteString("- Links: ")
for i, l := range links {
if i > 0 {
b.WriteString(", ")
}
b.WriteString(l.url)
}
b.WriteString("\n")
}
// include last few grouped lines as a teaser
tail := grouped
if len(tail) > 5 {
tail = tail[len(tail)-5:]
}
for _, g := range tail {
b.WriteString("• ")
b.WriteString(g.author)
b.WriteString(": ")
line := g.text
if len(line) > 200 {
line = line[:200] + "…"
}
b.WriteString(line)
b.WriteString("\n")
}
return strings.TrimSpace(b.String())
}
func strconvI(n int) string {
return strconv.Itoa(n)
}