feat: initial Beta 1 release
- soju raw connector with event playback and CHATHISTORY fallback
- SQLite store with msgid de-dup and retention job
- Mentions + Pushover + tuning; structured JSON logs
- Summaries: concise, link-following, multi-line grouping
- HTTP: /healthz, /ready, /tail, /trigger, /metrics
- Docker: distroless, healthcheck, version metadata
- Docs: README, CHANGELOG, compose
2025-08-15 18:06:28 -05:00
package summarizer
import (
"context"
2025-08-17 19:13:18 -05:00
"encoding/json"
feat: initial Beta 1 release
- soju raw connector with event playback and CHATHISTORY fallback
- SQLite store with msgid de-dup and retention job
- Mentions + Pushover + tuning; structured JSON logs
- Summaries: concise, link-following, multi-line grouping
- HTTP: /healthz, /ready, /tail, /trigger, /metrics
- Docker: distroless, healthcheck, version metadata
- Docs: README, CHANGELOG, compose
2025-08-15 18:06:28 -05:00
"io"
"net/http"
2025-08-15 20:41:31 -05:00
"net/url"
feat: initial Beta 1 release
- soju raw connector with event playback and CHATHISTORY fallback
- SQLite store with msgid de-dup and retention job
- Mentions + Pushover + tuning; structured JSON logs
- Summaries: concise, link-following, multi-line grouping
- HTTP: /healthz, /ready, /tail, /trigger, /metrics
- Docker: distroless, healthcheck, version metadata
- Docs: README, CHANGELOG, compose
2025-08-15 18:06:28 -05:00
"regexp"
2025-08-15 20:41:31 -05:00
"sort"
"strconv"
feat: initial Beta 1 release
- soju raw connector with event playback and CHATHISTORY fallback
- SQLite store with msgid de-dup and retention job
- Mentions + Pushover + tuning; structured JSON logs
- Summaries: concise, link-following, multi-line grouping
- HTTP: /healthz, /ready, /tail, /trigger, /metrics
- Docker: distroless, healthcheck, version metadata
- Docs: README, CHANGELOG, compose
2025-08-15 18:06:28 -05:00
"strings"
"time"
2025-08-15 20:41:31 -05:00
readability "github.com/go-shiori/go-readability"
feat: initial Beta 1 release
- soju raw connector with event playback and CHATHISTORY fallback
- SQLite store with msgid de-dup and retention job
- Mentions + Pushover + tuning; structured JSON logs
- Summaries: concise, link-following, multi-line grouping
- HTTP: /healthz, /ready, /tail, /trigger, /metrics
- Docker: distroless, healthcheck, version metadata
- Docs: README, CHANGELOG, compose
2025-08-15 18:06:28 -05:00
openai "github.com/sashabaranov/go-openai"
"sojuboy/internal/config"
"sojuboy/internal/store"
)
type OpenAI struct {
apiKey string
baseURL string
model string
maxTokens int
// runtime cfg
followLinks bool
linkTimeout time . Duration
linkMaxBytes int
groupWindow time . Duration
maxLinks int
2025-08-15 20:41:31 -05:00
maxGroups int
feat: initial Beta 1 release
- soju raw connector with event playback and CHATHISTORY fallback
- SQLite store with msgid de-dup and retention job
- Mentions + Pushover + tuning; structured JSON logs
- Summaries: concise, link-following, multi-line grouping
- HTTP: /healthz, /ready, /tail, /trigger, /metrics
- Docker: distroless, healthcheck, version metadata
- Docs: README, CHANGELOG, compose
2025-08-15 18:06:28 -05:00
}
func NewOpenAI ( apiKey , baseURL , model string , maxTokens int ) * OpenAI {
return & OpenAI { apiKey : apiKey , baseURL : baseURL , model : model , maxTokens : maxTokens ,
followLinks : true , linkTimeout : 6 * time . Second , linkMaxBytes : 262144 , groupWindow : 90 * time . Second , maxLinks : 5 ,
}
}
// Configure from app config
func ( o * OpenAI ) ApplyConfig ( cfg config . Config ) {
o . followLinks = cfg . SummFollowLinks
o . linkTimeout = cfg . SummLinkTimeout
o . linkMaxBytes = cfg . SummLinkMaxBytes
o . groupWindow = cfg . SummGroupWindow
o . maxLinks = cfg . SummMaxLinks
2025-08-15 20:41:31 -05:00
o . maxGroups = cfg . SummMaxGroups
feat: initial Beta 1 release
- soju raw connector with event playback and CHATHISTORY fallback
- SQLite store with msgid de-dup and retention job
- Mentions + Pushover + tuning; structured JSON logs
- Summaries: concise, link-following, multi-line grouping
- HTTP: /healthz, /ready, /tail, /trigger, /metrics
- Docker: distroless, healthcheck, version metadata
- Docs: README, CHANGELOG, compose
2025-08-15 18:06:28 -05:00
}
func ( o * OpenAI ) Summarize ( ctx context . Context , channel string , msgs [ ] store . Message , window time . Duration ) ( string , error ) {
if o == nil || o . apiKey == "" {
return "" , nil
}
cfg := openai . DefaultConfig ( o . apiKey )
if strings . TrimSpace ( o . baseURL ) != "" {
cfg . BaseURL = o . baseURL
}
client := openai . NewClientWithConfig ( cfg )
// 1) Group multiline posts from same author within groupWindow
grouped := groupMessages ( msgs , o . groupWindow )
2025-08-15 20:41:31 -05:00
// Apply group cap if configured (>0). 0 means no cap.
if o . maxGroups > 0 && len ( grouped ) > o . maxGroups {
grouped = grouped [ len ( grouped ) - o . maxGroups : ]
}
feat: initial Beta 1 release
- soju raw connector with event playback and CHATHISTORY fallback
- SQLite store with msgid de-dup and retention job
- Mentions + Pushover + tuning; structured JSON logs
- Summaries: concise, link-following, multi-line grouping
- HTTP: /healthz, /ready, /tail, /trigger, /metrics
- Docker: distroless, healthcheck, version metadata
- Docs: README, CHANGELOG, compose
2025-08-15 18:06:28 -05:00
2025-08-15 20:41:31 -05:00
// 2) Extract links and optionally fetch content
feat: initial Beta 1 release
- soju raw connector with event playback and CHATHISTORY fallback
- SQLite store with msgid de-dup and retention job
- Mentions + Pushover + tuning; structured JSON logs
- Summaries: concise, link-following, multi-line grouping
- HTTP: /healthz, /ready, /tail, /trigger, /metrics
- Docker: distroless, healthcheck, version metadata
- Docs: README, CHANGELOG, compose
2025-08-15 18:06:28 -05:00
links := extractLinks ( grouped )
2025-08-15 20:41:31 -05:00
// Split image vs non-image
var imageURLs [ ] string
var nonImageLinks [ ] linkSnippet
for _ , l := range links {
if isImageURL ( l . url ) {
imageURLs = append ( imageURLs , l . url )
} else {
nonImageLinks = append ( nonImageLinks , l )
}
}
if o . followLinks && len ( nonImageLinks ) > 0 {
nonImageLinks = fetchLinkSnippets ( ctx , nonImageLinks , o . linkTimeout , o . linkMaxBytes , o . maxLinks )
feat: initial Beta 1 release
- soju raw connector with event playback and CHATHISTORY fallback
- SQLite store with msgid de-dup and retention job
- Mentions + Pushover + tuning; structured JSON logs
- Summaries: concise, link-following, multi-line grouping
- HTTP: /healthz, /ready, /tail, /trigger, /metrics
- Docker: distroless, healthcheck, version metadata
- Docs: README, CHANGELOG, compose
2025-08-15 18:06:28 -05:00
}
// 3) Build a concise, natural prompt
var b strings . Builder
b . WriteString ( "Channel: " )
b . WriteString ( channel )
b . WriteString ( "\nTime window: " )
b . WriteString ( window . String ( ) )
b . WriteString ( "\n\nTranscript (grouped by author):\n" )
for _ , g := range grouped {
b . WriteString ( g . time . Format ( time . RFC3339 ) )
b . WriteString ( " " )
b . WriteString ( g . author )
b . WriteString ( ": " )
b . WriteString ( g . text )
b . WriteString ( "\n" )
}
2025-08-15 20:41:31 -05:00
if len ( nonImageLinks ) > 0 {
feat: initial Beta 1 release
- soju raw connector with event playback and CHATHISTORY fallback
- SQLite store with msgid de-dup and retention job
- Mentions + Pushover + tuning; structured JSON logs
- Summaries: concise, link-following, multi-line grouping
- HTTP: /healthz, /ready, /tail, /trigger, /metrics
- Docker: distroless, healthcheck, version metadata
- Docs: README, CHANGELOG, compose
2025-08-15 18:06:28 -05:00
b . WriteString ( "\nReferenced content (snippets):\n" )
2025-08-15 20:41:31 -05:00
for _ , ln := range nonImageLinks {
feat: initial Beta 1 release
- soju raw connector with event playback and CHATHISTORY fallback
- SQLite store with msgid de-dup and retention job
- Mentions + Pushover + tuning; structured JSON logs
- Summaries: concise, link-following, multi-line grouping
- HTTP: /healthz, /ready, /tail, /trigger, /metrics
- Docker: distroless, healthcheck, version metadata
- Docs: README, CHANGELOG, compose
2025-08-15 18:06:28 -05:00
b . WriteString ( "- " )
b . WriteString ( ln . url )
b . WriteString ( " → " )
b . WriteString ( ln . snippet )
b . WriteString ( "\n" )
}
}
b . WriteString ( "\nWrite a concise, readable summary of the conversation above.\n" )
b . WriteString ( "- Focus on what happened and why it matters.\n" )
b . WriteString ( "- Integrate linked content and pasted multi-line posts naturally.\n" )
b . WriteString ( "- Avoid rigid sections; use short paragraphs or light bullets if helpful.\n" )
2025-09-05 06:58:38 -05:00
b . WriteString ( "- Keep it compact but don’ t omit important context.\n" )
feat: initial Beta 1 release
- soju raw connector with event playback and CHATHISTORY fallback
- SQLite store with msgid de-dup and retention job
- Mentions + Pushover + tuning; structured JSON logs
- Summaries: concise, link-following, multi-line grouping
- HTTP: /healthz, /ready, /tail, /trigger, /metrics
- Docker: distroless, healthcheck, version metadata
- Docs: README, CHANGELOG, compose
2025-08-15 18:06:28 -05:00
prompt := b . String ( )
sys := "You summarize IRC transcripts. Be concise, natural, and informative."
model := o . model
if strings . TrimSpace ( model ) == "" {
model = "gpt-4o-mini"
}
reasoningLike := strings . HasPrefix ( model , "gpt-5" ) || strings . HasPrefix ( model , "o1" ) || strings . Contains ( model , "reasoning" )
2025-08-15 20:41:31 -05:00
// Build multimodal user message parts
userParts := [ ] openai . ChatMessagePart { { Type : openai . ChatMessagePartTypeText , Text : prompt } }
// Limit images to o.maxLinks to avoid overloading
maxImgs := o . maxLinks
if len ( imageURLs ) > maxImgs {
imageURLs = imageURLs [ : maxImgs ]
}
for _ , u := range imageURLs {
userParts = append ( userParts , openai . ChatMessagePart {
Type : openai . ChatMessagePartTypeImageURL ,
ImageURL : & openai . ChatMessageImageURL { URL : u } ,
} )
}
feat: initial Beta 1 release
- soju raw connector with event playback and CHATHISTORY fallback
- SQLite store with msgid de-dup and retention job
- Mentions + Pushover + tuning; structured JSON logs
- Summaries: concise, link-following, multi-line grouping
- HTTP: /healthz, /ready, /tail, /trigger, /metrics
- Docker: distroless, healthcheck, version metadata
- Docs: README, CHANGELOG, compose
2025-08-15 18:06:28 -05:00
req := openai . ChatCompletionRequest {
Model : model ,
Messages : [ ] openai . ChatCompletionMessage {
{ Role : openai . ChatMessageRoleSystem , Content : sys } ,
2025-08-15 20:41:31 -05:00
{ Role : openai . ChatMessageRoleUser , MultiContent : userParts } ,
feat: initial Beta 1 release
- soju raw connector with event playback and CHATHISTORY fallback
- SQLite store with msgid de-dup and retention job
- Mentions + Pushover + tuning; structured JSON logs
- Summaries: concise, link-following, multi-line grouping
- HTTP: /healthz, /ready, /tail, /trigger, /metrics
- Docker: distroless, healthcheck, version metadata
- Docs: README, CHANGELOG, compose
2025-08-15 18:06:28 -05:00
} ,
MaxCompletionTokens : o . maxTokens ,
}
if ! reasoningLike {
req . Temperature = 0.3
}
resp , err := client . CreateChatCompletion ( ctx , req )
if err != nil {
return "" , err
}
if len ( resp . Choices ) == 0 {
2025-08-15 20:41:31 -05:00
return localFallbackSummary ( grouped , append ( nonImageLinks , linksFromImages ( imageURLs ) ... ) ) , nil
}
out := strings . TrimSpace ( resp . Choices [ 0 ] . Message . Content )
if out == "" {
return localFallbackSummary ( grouped , append ( nonImageLinks , linksFromImages ( imageURLs ) ... ) ) , nil
}
return out , nil
}
2025-09-05 06:58:38 -05:00
// SummarizeForPush produces a digest tailored for push notifications (e.g., Pushover ~1024 chars).
// It uses a slightly more constrained prompt to encourage succinct output.
func ( o * OpenAI ) SummarizeForPush ( ctx context . Context , channel string , msgs [ ] store . Message , window time . Duration ) ( string , error ) {
if o == nil || o . apiKey == "" {
return "" , nil
}
cfg := openai . DefaultConfig ( o . apiKey )
if strings . TrimSpace ( o . baseURL ) != "" {
cfg . BaseURL = o . baseURL
}
client := openai . NewClientWithConfig ( cfg )
grouped := groupMessages ( msgs , o . groupWindow )
if o . maxGroups > 0 && len ( grouped ) > o . maxGroups {
grouped = grouped [ len ( grouped ) - o . maxGroups : ]
}
links := extractLinks ( grouped )
var imageURLs [ ] string
var nonImageLinks [ ] linkSnippet
for _ , l := range links {
if isImageURL ( l . url ) { imageURLs = append ( imageURLs , l . url ) } else { nonImageLinks = append ( nonImageLinks , l ) }
}
if o . followLinks && len ( nonImageLinks ) > 0 {
nonImageLinks = fetchLinkSnippets ( ctx , nonImageLinks , o . linkTimeout , o . linkMaxBytes , o . maxLinks )
}
var b strings . Builder
b . WriteString ( "Channel: " )
b . WriteString ( channel )
b . WriteString ( "\nTime window: " )
b . WriteString ( window . String ( ) )
b . WriteString ( "\n\nTranscript (grouped by author):\n" )
for _ , g := range grouped {
b . WriteString ( g . time . Format ( time . RFC3339 ) )
b . WriteString ( " " )
b . WriteString ( g . author )
b . WriteString ( ": " )
b . WriteString ( g . text )
b . WriteString ( "\n" )
}
if len ( nonImageLinks ) > 0 {
b . WriteString ( "\nReferenced content (snippets):\n" )
for _ , ln := range nonImageLinks {
b . WriteString ( "- " )
b . WriteString ( ln . url )
b . WriteString ( " → " )
b . WriteString ( ln . snippet )
b . WriteString ( "\n" )
}
}
b . WriteString ( "\nWrite a concise, readable summary of the conversation above.\n" )
b . WriteString ( "- Focus on what happened and why it matters.\n" )
b . WriteString ( "- Integrate linked content and pasted multi-line posts naturally.\n" )
b . WriteString ( "- Avoid rigid sections; use short paragraphs or light bullets if helpful.\n" )
b . WriteString ( "- Keep it compact but don’ t omit important context.\n" )
b . WriteString ( "- Keep the final output under ~900 characters suitable for a single push notification.\n" )
prompt := b . String ( )
sys := "You summarize IRC transcripts for a push notification. Be concise, natural, and informative."
model := o . model
if strings . TrimSpace ( model ) == "" { model = "gpt-4o-mini" }
reasoningLike := strings . HasPrefix ( model , "gpt-5" ) || strings . HasPrefix ( model , "o1" ) || strings . Contains ( model , "reasoning" )
var userParts [ ] openai . ChatMessagePart
userParts = append ( userParts , openai . ChatMessagePart { Type : openai . ChatMessagePartTypeText , Text : prompt } )
for _ , u := range imageURLs {
userParts = append ( userParts , openai . ChatMessagePart { Type : openai . ChatMessagePartTypeImageURL , ImageURL : & openai . ChatMessageImageURL { URL : u } } )
}
req := openai . ChatCompletionRequest {
Model : model ,
Messages : [ ] openai . ChatCompletionMessage {
{ Role : openai . ChatMessageRoleSystem , Content : sys } ,
{ Role : openai . ChatMessageRoleUser , MultiContent : userParts } ,
} ,
MaxCompletionTokens : o . maxTokens ,
}
if ! reasoningLike { req . Temperature = 0.3 }
resp , err := client . CreateChatCompletion ( ctx , req )
if err != nil { return "" , err }
if len ( resp . Choices ) == 0 { return localFallbackSummary ( grouped , append ( nonImageLinks , linksFromImages ( imageURLs ) ... ) ) , nil }
out := strings . TrimSpace ( resp . Choices [ 0 ] . Message . Content )
if out == "" { return localFallbackSummary ( grouped , append ( nonImageLinks , linksFromImages ( imageURLs ) ... ) ) , nil }
return out , nil
}
2025-08-17 18:52:39 -05:00
func ( o * OpenAI ) SummarizeLink ( ctx context . Context , rawURL string ) ( string , error ) {
if o == nil || o . apiKey == "" {
return "" , nil
}
cfg := openai . DefaultConfig ( o . apiKey )
if strings . TrimSpace ( o . baseURL ) != "" {
cfg . BaseURL = o . baseURL
}
client := openai . NewClientWithConfig ( cfg )
content := ""
2025-08-17 19:13:18 -05:00
title := ""
2025-08-17 18:52:39 -05:00
img := ""
2025-08-17 19:13:18 -05:00
lu , _ := url . Parse ( rawURL )
host := strings . ToLower ( lu . Host )
isYouTube := host == "www.youtube.com" || host == "youtube.com" || host == "m.youtube.com" || host == "youtu.be"
2025-08-17 20:29:50 -05:00
ua := "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0 Safari/537.36"
accept := "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
2025-08-17 18:52:39 -05:00
if isImageURL ( rawURL ) {
img = rawURL
2025-08-17 19:13:18 -05:00
} else if isYouTube {
// YouTube: try oEmbed for title + thumbnail
watchURL := rawURL
if host == "youtu.be" {
id := strings . TrimPrefix ( lu . Path , "/" )
watchURL = "https://www.youtube.com/watch?v=" + id
}
ctx2 , cancel := context . WithTimeout ( ctx , o . linkTimeout )
defer cancel ( )
oembed := "https://www.youtube.com/oembed?format=json&url=" + url . QueryEscape ( watchURL )
req , _ := http . NewRequestWithContext ( ctx2 , http . MethodGet , oembed , nil )
2025-08-17 20:29:50 -05:00
req . Header . Set ( "User-Agent" , ua )
req . Header . Set ( "Accept" , accept )
2025-08-17 19:13:18 -05:00
if resp , err := http . DefaultClient . Do ( req ) ; err == nil {
2025-08-17 19:27:36 -05:00
func ( ) {
2025-08-17 19:13:18 -05:00
defer resp . Body . Close ( )
if resp . StatusCode >= 200 && resp . StatusCode < 300 {
2025-08-17 19:27:36 -05:00
var oem struct {
2025-08-17 19:13:18 -05:00
Title string ` json:"title" `
Thumb string ` json:"thumbnail_url" `
}
if err := json . NewDecoder ( resp . Body ) . Decode ( & oem ) ; err == nil {
2025-08-17 19:27:36 -05:00
if oem . Title != "" {
title = oem . Title
}
if oem . Thumb != "" {
img = oem . Thumb
}
2025-08-17 19:13:18 -05:00
}
}
} ( )
}
// No robust transcript grab here; rely on model generalization + title
2025-08-17 18:52:39 -05:00
} else if o . followLinks {
ctx2 , cancel := context . WithTimeout ( ctx , o . linkTimeout )
defer cancel ( )
req , err := http . NewRequestWithContext ( ctx2 , http . MethodGet , rawURL , nil )
if err == nil {
2025-08-17 20:29:50 -05:00
req . Header . Set ( "User-Agent" , ua )
req . Header . Set ( "Accept" , accept )
req . Header . Set ( "Accept-Language" , "en-US,en;q=0.9" )
2025-08-17 18:52:39 -05:00
resp , err := http . DefaultClient . Do ( req )
if err == nil {
func ( ) {
defer resp . Body . Close ( )
if resp . StatusCode >= 200 && resp . StatusCode < 300 {
lr := & io . LimitedReader { R : resp . Body , N : int64 ( o . linkMaxBytes ) }
b , _ := io . ReadAll ( lr )
text := string ( b )
if base , perr := url . Parse ( rawURL ) ; perr == nil {
if art , err := readability . FromReader ( strings . NewReader ( text ) , base ) ; err == nil {
if at := strings . TrimSpace ( art . TextContent ) ; at != "" {
text = at
2025-08-17 19:27:36 -05:00
if title == "" && strings . TrimSpace ( art . Title ) != "" {
title = strings . TrimSpace ( art . Title )
}
2025-08-17 18:52:39 -05:00
}
}
}
text = strings . ReplaceAll ( text , "\r" , "" )
text = strings . TrimSpace ( text )
2025-08-17 19:27:36 -05:00
if len ( text ) > 6000 {
text = text [ : 6000 ]
}
2025-08-17 18:52:39 -05:00
content = text
}
} ( )
}
}
}
// Build link-specific prompt
2025-08-17 20:29:50 -05:00
sys := "You summarize the content at a single URL. You are given extracted text, title, or image/thumbnail. If the extract is limited, infer the best short summary from what’ s available. Do not say you can’ t open links or ask for more text; if there’ s truly nothing usable, return '(no summary)'. Be concise and natural."
2025-08-17 18:52:39 -05:00
var userParts [ ] openai . ChatMessagePart
b := strings . Builder { }
b . WriteString ( "URL: " )
b . WriteString ( rawURL )
2025-08-17 19:13:18 -05:00
b . WriteString ( "\n" )
2025-08-17 19:27:36 -05:00
if title != "" {
b . WriteString ( "Title: " )
b . WriteString ( title )
b . WriteString ( "\n" )
}
2025-08-17 19:13:18 -05:00
b . WriteString ( "\n" )
2025-08-17 18:52:39 -05:00
if content != "" {
b . WriteString ( "Extracted content (may be truncated):\n" )
b . WriteString ( content )
b . WriteString ( "\n\n" )
}
2025-09-05 06:58:38 -05:00
b . WriteString ( "Write a short, skimmable summary of the page/video/image above. If relevant, include key takeaways and any notable cautions. Keep it under a few short paragraphs." )
2025-08-17 18:52:39 -05:00
userParts = append ( userParts , openai . ChatMessagePart { Type : openai . ChatMessagePartTypeText , Text : b . String ( ) } )
if img != "" {
userParts = append ( userParts , openai . ChatMessagePart { Type : openai . ChatMessagePartTypeImageURL , ImageURL : & openai . ChatMessageImageURL { URL : img } } )
}
model := o . model
if strings . TrimSpace ( model ) == "" {
model = "gpt-4o-mini"
}
reasoningLike := strings . HasPrefix ( model , "gpt-5" ) || strings . HasPrefix ( model , "o1" ) || strings . Contains ( model , "reasoning" )
req := openai . ChatCompletionRequest {
Model : model ,
Messages : [ ] openai . ChatCompletionMessage {
{ Role : openai . ChatMessageRoleSystem , Content : sys } ,
{ Role : openai . ChatMessageRoleUser , MultiContent : userParts } ,
} ,
MaxCompletionTokens : o . maxTokens ,
}
2025-08-17 20:37:27 -05:00
if ! reasoningLike {
req . Temperature = 0.2
}
2025-08-17 18:52:39 -05:00
resp , err := client . CreateChatCompletion ( ctx , req )
2025-08-17 20:37:27 -05:00
if err != nil {
return "" , err
}
if len ( resp . Choices ) == 0 {
return "" , nil
}
2025-08-17 18:52:39 -05:00
return strings . TrimSpace ( resp . Choices [ 0 ] . Message . Content ) , nil
}
2025-08-15 20:41:31 -05:00
func linksFromImages ( imgs [ ] string ) [ ] linkSnippet {
out := make ( [ ] linkSnippet , 0 , len ( imgs ) )
for _ , u := range imgs {
out = append ( out , linkSnippet { url : u } )
}
return out
}
func isImageURL ( u string ) bool {
lu := strings . ToLower ( u )
for _ , ext := range [ ] string { ".jpg" , ".jpeg" , ".png" , ".gif" , ".webp" } {
if strings . HasSuffix ( lu , ext ) {
return true
}
feat: initial Beta 1 release
- soju raw connector with event playback and CHATHISTORY fallback
- SQLite store with msgid de-dup and retention job
- Mentions + Pushover + tuning; structured JSON logs
- Summaries: concise, link-following, multi-line grouping
- HTTP: /healthz, /ready, /tail, /trigger, /metrics
- Docker: distroless, healthcheck, version metadata
- Docs: README, CHANGELOG, compose
2025-08-15 18:06:28 -05:00
}
2025-08-15 20:41:31 -05:00
return false
feat: initial Beta 1 release
- soju raw connector with event playback and CHATHISTORY fallback
- SQLite store with msgid de-dup and retention job
- Mentions + Pushover + tuning; structured JSON logs
- Summaries: concise, link-following, multi-line grouping
- HTTP: /healthz, /ready, /tail, /trigger, /metrics
- Docker: distroless, healthcheck, version metadata
- Docs: README, CHANGELOG, compose
2025-08-15 18:06:28 -05:00
}
type linkSnippet struct {
url string
snippet string
}
type groupedMsg struct {
time time . Time
author string
text string
}
func groupMessages ( msgs [ ] store . Message , window time . Duration ) [ ] groupedMsg {
if len ( msgs ) == 0 {
return nil
}
var out [ ] groupedMsg
cur := groupedMsg { time : msgs [ 0 ] . Time , author : msgs [ 0 ] . Author , text : msgs [ 0 ] . Body }
for i := 1 ; i < len ( msgs ) ; i ++ {
m := msgs [ i ]
if m . Author == cur . author && m . Time . Sub ( cur . time ) <= window {
cur . text += "\n" + m . Body
continue
}
out = append ( out , cur )
cur = groupedMsg { time : m . Time , author : m . Author , text : m . Body }
}
out = append ( out , cur )
return out
}
var linkRe = regexp . MustCompile ( ` https?://\S+ ` )
func extractLinks ( msgs [ ] groupedMsg ) [ ] linkSnippet {
var links [ ] linkSnippet
for _ , g := range msgs {
for _ , m := range linkRe . FindAllString ( g . text , - 1 ) {
links = append ( links , linkSnippet { url : m } )
}
}
2025-08-15 20:41:31 -05:00
// de-dup
saw := make ( map [ string ] bool )
dedup := make ( [ ] linkSnippet , 0 , len ( links ) )
for _ , l := range links {
if ! saw [ l . url ] {
saw [ l . url ] = true
dedup = append ( dedup , l )
}
}
return dedup
feat: initial Beta 1 release
- soju raw connector with event playback and CHATHISTORY fallback
- SQLite store with msgid de-dup and retention job
- Mentions + Pushover + tuning; structured JSON logs
- Summaries: concise, link-following, multi-line grouping
- HTTP: /healthz, /ready, /tail, /trigger, /metrics
- Docker: distroless, healthcheck, version metadata
- Docs: README, CHANGELOG, compose
2025-08-15 18:06:28 -05:00
}
func fetchLinkSnippets ( ctx context . Context , links [ ] linkSnippet , timeout time . Duration , maxBytes int , maxLinks int ) [ ] linkSnippet {
client := & http . Client { Timeout : timeout }
if len ( links ) > maxLinks {
links = links [ : maxLinks ]
}
out := make ( [ ] linkSnippet , 0 , len ( links ) )
for _ , ln := range links {
req , err := http . NewRequestWithContext ( ctx , http . MethodGet , ln . url , nil )
if err != nil {
continue
}
resp , err := client . Do ( req )
if err != nil {
continue
}
func ( ) {
defer resp . Body . Close ( )
if resp . StatusCode < 200 || resp . StatusCode >= 300 {
return
}
limited := io . LimitedReader { R : resp . Body , N : int64 ( maxBytes ) }
b , err := io . ReadAll ( & limited )
if err != nil || len ( b ) == 0 {
return
}
text := string ( b )
2025-08-15 20:41:31 -05:00
// Try readability for cleaner article text
if baseURL , perr := url . Parse ( ln . url ) ; perr == nil {
if art , err := readability . FromReader ( strings . NewReader ( text ) , baseURL ) ; err == nil {
if at := strings . TrimSpace ( art . TextContent ) ; at != "" {
text = at
}
}
}
feat: initial Beta 1 release
- soju raw connector with event playback and CHATHISTORY fallback
- SQLite store with msgid de-dup and retention job
- Mentions + Pushover + tuning; structured JSON logs
- Summaries: concise, link-following, multi-line grouping
- HTTP: /healthz, /ready, /tail, /trigger, /metrics
- Docker: distroless, healthcheck, version metadata
- Docs: README, CHANGELOG, compose
2025-08-15 18:06:28 -05:00
text = strings . ReplaceAll ( text , "\r" , "" )
text = strings . TrimSpace ( text )
2025-08-15 20:41:31 -05:00
if len ( text ) > 2000 {
text = text [ : 2000 ]
feat: initial Beta 1 release
- soju raw connector with event playback and CHATHISTORY fallback
- SQLite store with msgid de-dup and retention job
- Mentions + Pushover + tuning; structured JSON logs
- Summaries: concise, link-following, multi-line grouping
- HTTP: /healthz, /ready, /tail, /trigger, /metrics
- Docker: distroless, healthcheck, version metadata
- Docs: README, CHANGELOG, compose
2025-08-15 18:06:28 -05:00
}
out = append ( out , linkSnippet { url : ln . url , snippet : text } )
} ( )
}
return out
}
2025-08-15 20:41:31 -05:00
func localFallbackSummary ( grouped [ ] groupedMsg , links [ ] linkSnippet ) string {
if len ( grouped ) == 0 {
return ""
}
// simple counts
authors := map [ string ] int { }
for _ , g := range grouped {
authors [ g . author ] ++
}
authorList := make ( [ ] string , 0 , len ( authors ) )
for a := range authors {
authorList = append ( authorList , a )
}
sort . Strings ( authorList )
var b strings . Builder
b . WriteString ( "Summary (fallback)\n" )
b . WriteString ( "- Messages: " )
b . WriteString ( strconvI ( len ( grouped ) ) )
b . WriteString ( " groups by " )
b . WriteString ( strconvI ( len ( authors ) ) )
b . WriteString ( " authors\n" )
if len ( links ) > 0 {
b . WriteString ( "- Links: " )
for i , l := range links {
if i > 0 {
b . WriteString ( ", " )
}
b . WriteString ( l . url )
}
b . WriteString ( "\n" )
}
// include last few grouped lines as a teaser
tail := grouped
if len ( tail ) > 5 {
tail = tail [ len ( tail ) - 5 : ]
}
for _ , g := range tail {
b . WriteString ( "• " )
b . WriteString ( g . author )
b . WriteString ( ": " )
line := g . text
if len ( line ) > 200 {
line = line [ : 200 ] + "…"
}
b . WriteString ( line )
b . WriteString ( "\n" )
}
return strings . TrimSpace ( b . String ( ) )
}
func strconvI ( n int ) string {
return strconv . Itoa ( n )
}