feat(link-summ): add SummarizeLink API and server integration; youtube oEmbed returns HTML only to avoid duplicate thumbs

This commit is contained in:
Thomas Cravey 2025-08-17 18:52:39 -05:00
parent 29d94c13d5
commit 575622b45c
3 changed files with 116 additions and 17 deletions

View file

@ -257,7 +257,9 @@ func (s *Server) handleLinkCard(w http.ResponseWriter, r *http.Request) {
respTw, errTw := client.Do(reqTw) respTw, errTw := client.Do(reqTw)
if errTw == nil && respTw.StatusCode >= 200 && respTw.StatusCode < 300 { if errTw == nil && respTw.StatusCode >= 200 && respTw.StatusCode < 300 {
defer respTw.Body.Close() defer respTw.Body.Close()
var o struct{ HTML string `json:"html"` } var o struct {
HTML string `json:"html"`
}
if err := json.NewDecoder(respTw.Body).Decode(&o); err == nil && o.HTML != "" { if err := json.NewDecoder(respTw.Body).Decode(&o); err == nil && o.HTML != "" {
card := linkCard{URL: raw, HTML: o.HTML} card := linkCard{URL: raw, HTML: o.HTML}
s.cardCache[raw] = card s.cardCache[raw] = card
@ -291,7 +293,8 @@ func (s *Server) handleLinkCard(w http.ResponseWriter, r *http.Request) {
HTML string `json:"html"` HTML string `json:"html"`
} }
if err := json.NewDecoder(respY.Body).Decode(&o); err == nil { if err := json.NewDecoder(respY.Body).Decode(&o); err == nil {
card := linkCard{URL: raw, Title: o.Title, Image: o.Thumb, HTML: o.HTML} card := linkCard{URL: raw, Title: o.Title, HTML: o.HTML}
// Note: do not set Image when HTML is provided to avoid duplicate thumbnails (embed already includes preview)
s.cardCache[raw] = card s.cardCache[raw] = card
s.cardCacheExp[raw] = time.Now().Add(24 * time.Hour) s.cardCacheExp[raw] = time.Now().Add(24 * time.Hour)
w.Header().Set("Content-Type", "application/json") w.Header().Set("Content-Type", "application/json")
@ -422,25 +425,38 @@ func (s *Server) handleLinkSummary(w http.ResponseWriter, r *http.Request) {
_, _ = w.Write([]byte("summarizer not configured")) _, _ = w.Write([]byte("summarizer not configured"))
return return
} }
if s.summaryCache == nil { if s.summaryCache == nil { s.summaryCache = make(map[string]string) }
s.summaryCache = make(map[string]string) if s.summaryCacheExp == nil { s.summaryCacheExp = make(map[string]time.Time) }
}
if s.summaryCacheExp == nil {
s.summaryCacheExp = make(map[string]time.Time)
}
if exp, ok := s.summaryCacheExp[raw]; ok && time.Now().Before(exp) { if exp, ok := s.summaryCacheExp[raw]; ok && time.Now().Before(exp) {
w.Header().Set("Content-Type", "application/json") w.Header().Set("Content-Type", "application/json")
_ = json.NewEncoder(w).Encode(map[string]any{"summary": s.summaryCache[raw]}) _ = json.NewEncoder(w).Encode(map[string]any{"summary": s.summaryCache[raw]})
return return
} }
// Prefer link-specific summarization when available
if sl, ok := s.Summarizer.(interface{ SummarizeLink(context.Context, string) (string, error) }); ok {
tout := s.SummarizerTimeout
if tout <= 0 { tout = 5 * time.Minute }
if tout > 2*time.Minute { tout = 2 * time.Minute }
ctx, cancel := context.WithTimeout(r.Context(), tout)
defer cancel()
sum, err := sl.SummarizeLink(ctx, raw)
if err != nil {
w.WriteHeader(http.StatusBadGateway)
_, _ = w.Write([]byte("summarizer error"))
return
}
if sum == "" { sum = "(no summary)" }
s.summaryCache[raw] = sum
s.summaryCacheExp[raw] = time.Now().Add(24 * time.Hour)
w.Header().Set("Content-Type", "application/json")
_ = json.NewEncoder(w).Encode(map[string]any{"summary": sum})
return
}
// Fallback to generic path
msgs := []store.Message{{Channel: "#links", Author: "link", Body: raw, Time: time.Now().UTC()}} msgs := []store.Message{{Channel: "#links", Author: "link", Body: raw, Time: time.Now().UTC()}}
tout := s.SummarizerTimeout tout := s.SummarizerTimeout
if tout <= 0 { if tout <= 0 { tout = 5 * time.Minute }
tout = 5 * time.Minute if tout > 2*time.Minute { tout = 2 * time.Minute }
}
if tout > 2*time.Minute {
tout = 2 * time.Minute
}
ctx, cancel := context.WithTimeout(r.Context(), tout) ctx, cancel := context.WithTimeout(r.Context(), tout)
defer cancel() defer cancel()
sum, err := s.Summarizer.Summarize(ctx, "#links", msgs, 0) sum, err := s.Summarizer.Summarize(ctx, "#links", msgs, 0)
@ -449,9 +465,7 @@ func (s *Server) handleLinkSummary(w http.ResponseWriter, r *http.Request) {
_, _ = w.Write([]byte("summarizer error")) _, _ = w.Write([]byte("summarizer error"))
return return
} }
if sum == "" { if sum == "" { sum = "(no summary)" }
sum = "(no summary)"
}
s.summaryCache[raw] = sum s.summaryCache[raw] = sum
s.summaryCacheExp[raw] = time.Now().Add(24 * time.Hour) s.summaryCacheExp[raw] = time.Now().Add(24 * time.Hour)
w.Header().Set("Content-Type", "application/json") w.Header().Set("Content-Type", "application/json")

View file

@ -161,6 +161,90 @@ func (o *OpenAI) Summarize(ctx context.Context, channel string, msgs []store.Mes
return out, nil return out, nil
} }
func (o *OpenAI) SummarizeLink(ctx context.Context, rawURL string) (string, error) {
if o == nil || o.apiKey == "" {
return "", nil
}
cfg := openai.DefaultConfig(o.apiKey)
if strings.TrimSpace(o.baseURL) != "" {
cfg.BaseURL = o.baseURL
}
client := openai.NewClientWithConfig(cfg)
content := ""
img := ""
if isImageURL(rawURL) {
img = rawURL
} else if o.followLinks {
ctx2, cancel := context.WithTimeout(ctx, o.linkTimeout)
defer cancel()
req, err := http.NewRequestWithContext(ctx2, http.MethodGet, rawURL, nil)
if err == nil {
resp, err := http.DefaultClient.Do(req)
if err == nil {
func() {
defer resp.Body.Close()
if resp.StatusCode >= 200 && resp.StatusCode < 300 {
lr := &io.LimitedReader{R: resp.Body, N: int64(o.linkMaxBytes)}
b, _ := io.ReadAll(lr)
text := string(b)
if base, perr := url.Parse(rawURL); perr == nil {
if art, err := readability.FromReader(strings.NewReader(text), base); err == nil {
if at := strings.TrimSpace(art.TextContent); at != "" {
text = at
}
}
}
text = strings.ReplaceAll(text, "\r", "")
text = strings.TrimSpace(text)
if len(text) > 6000 { text = text[:6000] }
content = text
}
}()
}
}
}
// Build link-specific prompt
sys := "You summarize the content at a single URL. Ignore surrounding chat context. Be concise and natural."
var userParts []openai.ChatMessagePart
b := strings.Builder{}
b.WriteString("URL: ")
b.WriteString(rawURL)
b.WriteString("\n\n")
if content != "" {
b.WriteString("Extracted content (may be truncated):\n")
b.WriteString(content)
b.WriteString("\n\n")
}
b.WriteString("Write a short, skimmable summary of the page/video/image above. If relevant, include key takeaways and any notable cautions. Keep it under a few short paragraphs.")
userParts = append(userParts, openai.ChatMessagePart{Type: openai.ChatMessagePartTypeText, Text: b.String()})
if img != "" {
userParts = append(userParts, openai.ChatMessagePart{Type: openai.ChatMessagePartTypeImageURL, ImageURL: &openai.ChatMessageImageURL{URL: img}})
}
model := o.model
if strings.TrimSpace(model) == "" {
model = "gpt-4o-mini"
}
reasoningLike := strings.HasPrefix(model, "gpt-5") || strings.HasPrefix(model, "o1") || strings.Contains(model, "reasoning")
req := openai.ChatCompletionRequest{
Model: model,
Messages: []openai.ChatCompletionMessage{
{Role: openai.ChatMessageRoleSystem, Content: sys},
{Role: openai.ChatMessageRoleUser, MultiContent: userParts},
},
MaxCompletionTokens: o.maxTokens,
}
if !reasoningLike { req.Temperature = 0.2 }
resp, err := client.CreateChatCompletion(ctx, req)
if err != nil { return "", err }
if len(resp.Choices) == 0 { return "", nil }
return strings.TrimSpace(resp.Choices[0].Message.Content), nil
}
func linksFromImages(imgs []string) []linkSnippet { func linksFromImages(imgs []string) []linkSnippet {
out := make([]linkSnippet, 0, len(imgs)) out := make([]linkSnippet, 0, len(imgs))
for _, u := range imgs { for _, u := range imgs {

View file

@ -9,6 +9,7 @@ import (
type Summarizer interface { type Summarizer interface {
Summarize(ctx context.Context, channel string, msgs []store.Message, window time.Duration) (string, error) Summarize(ctx context.Context, channel string, msgs []store.Message, window time.Duration) (string, error)
SummarizeLink(ctx context.Context, rawURL string) (string, error)
} }