feat: initial Beta 1 release
- soju raw connector with event playback and CHATHISTORY fallback - SQLite store with msgid de-dup and retention job - Mentions + Pushover + tuning; structured JSON logs - Summaries: concise, link-following, multi-line grouping - HTTP: /healthz, /ready, /tail, /trigger, /metrics - Docker: distroless, healthcheck, version metadata - Docs: README, CHANGELOG, compose
This commit is contained in:
commit
2954e85e7a
19 changed files with 1983 additions and 0 deletions
204
internal/summarizer/openai.go
Normal file
204
internal/summarizer/openai.go
Normal file
|
|
@ -0,0 +1,204 @@
|
|||
package summarizer
|
||||
|
||||
import (
|
||||
"context"
|
||||
"io"
|
||||
"net/http"
|
||||
"regexp"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
openai "github.com/sashabaranov/go-openai"
|
||||
|
||||
"sojuboy/internal/config"
|
||||
"sojuboy/internal/store"
|
||||
)
|
||||
|
||||
type OpenAI struct {
|
||||
apiKey string
|
||||
baseURL string
|
||||
model string
|
||||
maxTokens int
|
||||
// runtime cfg
|
||||
followLinks bool
|
||||
linkTimeout time.Duration
|
||||
linkMaxBytes int
|
||||
groupWindow time.Duration
|
||||
maxLinks int
|
||||
}
|
||||
|
||||
func NewOpenAI(apiKey, baseURL, model string, maxTokens int) *OpenAI {
|
||||
return &OpenAI{apiKey: apiKey, baseURL: baseURL, model: model, maxTokens: maxTokens,
|
||||
followLinks: true, linkTimeout: 6 * time.Second, linkMaxBytes: 262144, groupWindow: 90 * time.Second, maxLinks: 5,
|
||||
}
|
||||
}
|
||||
|
||||
// Configure from app config
|
||||
func (o *OpenAI) ApplyConfig(cfg config.Config) {
|
||||
o.followLinks = cfg.SummFollowLinks
|
||||
o.linkTimeout = cfg.SummLinkTimeout
|
||||
o.linkMaxBytes = cfg.SummLinkMaxBytes
|
||||
o.groupWindow = cfg.SummGroupWindow
|
||||
o.maxLinks = cfg.SummMaxLinks
|
||||
}
|
||||
|
||||
func (o *OpenAI) Summarize(ctx context.Context, channel string, msgs []store.Message, window time.Duration) (string, error) {
|
||||
if o == nil || o.apiKey == "" {
|
||||
return "", nil
|
||||
}
|
||||
cfg := openai.DefaultConfig(o.apiKey)
|
||||
if strings.TrimSpace(o.baseURL) != "" {
|
||||
cfg.BaseURL = o.baseURL
|
||||
}
|
||||
client := openai.NewClientWithConfig(cfg)
|
||||
|
||||
// 1) Group multiline posts from same author within groupWindow
|
||||
grouped := groupMessages(msgs, o.groupWindow)
|
||||
|
||||
// 2) Extract links and optionally fetch a small amount of content
|
||||
links := extractLinks(grouped)
|
||||
if o.followLinks && len(links) > 0 {
|
||||
links = fetchLinkSnippets(ctx, links, o.linkTimeout, o.linkMaxBytes, o.maxLinks)
|
||||
}
|
||||
|
||||
// 3) Build a concise, natural prompt
|
||||
var b strings.Builder
|
||||
b.WriteString("Channel: ")
|
||||
b.WriteString(channel)
|
||||
b.WriteString("\nTime window: ")
|
||||
b.WriteString(window.String())
|
||||
b.WriteString("\n\nTranscript (grouped by author):\n")
|
||||
for _, g := range grouped {
|
||||
b.WriteString(g.time.Format(time.RFC3339))
|
||||
b.WriteString(" ")
|
||||
b.WriteString(g.author)
|
||||
b.WriteString(": ")
|
||||
b.WriteString(g.text)
|
||||
b.WriteString("\n")
|
||||
}
|
||||
if len(links) > 0 {
|
||||
b.WriteString("\nReferenced content (snippets):\n")
|
||||
for _, ln := range links {
|
||||
b.WriteString("- ")
|
||||
b.WriteString(ln.url)
|
||||
b.WriteString(" → ")
|
||||
b.WriteString(ln.snippet)
|
||||
b.WriteString("\n")
|
||||
}
|
||||
}
|
||||
b.WriteString("\nWrite a concise, readable summary of the conversation above.\n")
|
||||
b.WriteString("- Focus on what happened and why it matters.\n")
|
||||
b.WriteString("- Integrate linked content and pasted multi-line posts naturally.\n")
|
||||
b.WriteString("- Avoid rigid sections; use short paragraphs or light bullets if helpful.\n")
|
||||
b.WriteString("- Keep it compact but don’t omit important context.\n")
|
||||
prompt := b.String()
|
||||
|
||||
sys := "You summarize IRC transcripts. Be concise, natural, and informative."
|
||||
|
||||
model := o.model
|
||||
if strings.TrimSpace(model) == "" {
|
||||
model = "gpt-4o-mini"
|
||||
}
|
||||
reasoningLike := strings.HasPrefix(model, "gpt-5") || strings.HasPrefix(model, "o1") || strings.Contains(model, "reasoning")
|
||||
|
||||
req := openai.ChatCompletionRequest{
|
||||
Model: model,
|
||||
Messages: []openai.ChatCompletionMessage{
|
||||
{Role: openai.ChatMessageRoleSystem, Content: sys},
|
||||
{Role: openai.ChatMessageRoleUser, Content: prompt},
|
||||
},
|
||||
MaxCompletionTokens: o.maxTokens,
|
||||
}
|
||||
if !reasoningLike {
|
||||
req.Temperature = 0.3
|
||||
}
|
||||
|
||||
resp, err := client.CreateChatCompletion(ctx, req)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
if len(resp.Choices) == 0 {
|
||||
return "", nil
|
||||
}
|
||||
return strings.TrimSpace(resp.Choices[0].Message.Content), nil
|
||||
}
|
||||
|
||||
type linkSnippet struct {
|
||||
url string
|
||||
snippet string
|
||||
}
|
||||
|
||||
type groupedMsg struct {
|
||||
time time.Time
|
||||
author string
|
||||
text string
|
||||
}
|
||||
|
||||
func groupMessages(msgs []store.Message, window time.Duration) []groupedMsg {
|
||||
if len(msgs) == 0 {
|
||||
return nil
|
||||
}
|
||||
var out []groupedMsg
|
||||
cur := groupedMsg{time: msgs[0].Time, author: msgs[0].Author, text: msgs[0].Body}
|
||||
for i := 1; i < len(msgs); i++ {
|
||||
m := msgs[i]
|
||||
if m.Author == cur.author && m.Time.Sub(cur.time) <= window {
|
||||
cur.text += "\n" + m.Body
|
||||
continue
|
||||
}
|
||||
out = append(out, cur)
|
||||
cur = groupedMsg{time: m.Time, author: m.Author, text: m.Body}
|
||||
}
|
||||
out = append(out, cur)
|
||||
return out
|
||||
}
|
||||
|
||||
var linkRe = regexp.MustCompile(`https?://\S+`)
|
||||
|
||||
func extractLinks(msgs []groupedMsg) []linkSnippet {
|
||||
var links []linkSnippet
|
||||
for _, g := range msgs {
|
||||
for _, m := range linkRe.FindAllString(g.text, -1) {
|
||||
links = append(links, linkSnippet{url: m})
|
||||
}
|
||||
}
|
||||
return links
|
||||
}
|
||||
|
||||
func fetchLinkSnippets(ctx context.Context, links []linkSnippet, timeout time.Duration, maxBytes int, maxLinks int) []linkSnippet {
|
||||
client := &http.Client{Timeout: timeout}
|
||||
if len(links) > maxLinks {
|
||||
links = links[:maxLinks]
|
||||
}
|
||||
out := make([]linkSnippet, 0, len(links))
|
||||
for _, ln := range links {
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, ln.url, nil)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
func() {
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
||||
return
|
||||
}
|
||||
limited := io.LimitedReader{R: resp.Body, N: int64(maxBytes)}
|
||||
b, err := io.ReadAll(&limited)
|
||||
if err != nil || len(b) == 0 {
|
||||
return
|
||||
}
|
||||
// naive text cleanup
|
||||
text := string(b)
|
||||
text = strings.ReplaceAll(text, "\r", "")
|
||||
text = strings.TrimSpace(text)
|
||||
if len(text) > 800 {
|
||||
text = text[:800]
|
||||
}
|
||||
out = append(out, linkSnippet{url: ln.url, snippet: text})
|
||||
}()
|
||||
}
|
||||
return out
|
||||
}
|
||||
14
internal/summarizer/summarizer.go
Normal file
14
internal/summarizer/summarizer.go
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
package summarizer
|
||||
|
||||
import (
|
||||
"context"
|
||||
"time"
|
||||
|
||||
"sojuboy/internal/store"
|
||||
)
|
||||
|
||||
type Summarizer interface {
|
||||
Summarize(ctx context.Context, channel string, msgs []store.Message, window time.Duration) (string, error)
|
||||
}
|
||||
|
||||
|
||||
Loading…
Add table
Add a link
Reference in a new issue