docs: expand .env example to show max/large values; add SUMM_TIMEOUT and summarizer tunables\n\nfeat: summarizer improvements\n- readability extraction for articles\n- image links passed to model as vision inputs\n- configurable max groups/links/bytes and timeout\n- higher default ceilings; resilient fallback summary
This commit is contained in:
parent
2954e85e7a
commit
9ecf4f4f4c
7 changed files with 296 additions and 53 deletions
43
README.md
43
README.md
|
|
@ -31,14 +31,14 @@ Runtime modules:
|
||||||
- `internal/notifier`: Pushover notifier (pluggable interface)
|
- `internal/notifier`: Pushover notifier (pluggable interface)
|
||||||
- `internal/summarizer`: OpenAI client with GPT-5 defaults, GPT-4o-mini fallback
|
- `internal/summarizer`: OpenAI client with GPT-5 defaults, GPT-4o-mini fallback
|
||||||
- `internal/scheduler`: cron-based digest scheduling and daily retention job
|
- `internal/scheduler`: cron-based digest scheduling and daily retention job
|
||||||
- `internal/httpapi`: `/healthz`, `/tail`, `/trigger`, `/metrics`
|
- `internal/httpapi`: `/healthz`, `/ready`, `/tail`, `/trigger`, `/metrics`
|
||||||
- `internal/config`: env config loader and helpers
|
- `internal/config`: env config loader and helpers
|
||||||
|
|
||||||
## Features
|
## Features
|
||||||
|
|
||||||
- Mention/keyword detection: punctuation-tolerant (letters, digits, `_` and `-` are word chars)
|
- Mention/keyword detection: punctuation-tolerant (letters, digits, `_` and `-` are word chars)
|
||||||
- Mention tuning: allow/deny channels, urgent keywords bypass quiet hours, rate limiting
|
- Mention tuning: allow/deny channels, urgent keywords bypass quiet hours, rate limiting
|
||||||
- AI digest generation: concise natural summaries (no rigid sections); integrates pasted multi-line posts and referenced link context
|
- AI digest generation: concise natural summaries (no rigid sections); integrates pasted multi-line posts and referenced link context; image links sent to GPT‑5 as vision inputs
|
||||||
- Configurable schedules (cron), quiet hours, and summary parameters
|
- Configurable schedules (cron), quiet hours, and summary parameters
|
||||||
- Local persistence with retention pruning (daily at 03:00)
|
- Local persistence with retention pruning (daily at 03:00)
|
||||||
- HTTP endpoints: health, tail, metrics, on-demand digests
|
- HTTP endpoints: health, tail, metrics, on-demand digests
|
||||||
|
|
@ -63,9 +63,9 @@ Runtime modules:
|
||||||
- Debug logs include: mention delivered or suppression reason (backfill, quiet hours, rate limit)
|
- Debug logs include: mention delivered or suppression reason (backfill, quiet hours, rate limit)
|
||||||
|
|
||||||
5) Summarization:
|
5) Summarization:
|
||||||
- `/trigger` or the scheduler loads a window and calls OpenAI (with a 60s timeout)
|
- `/trigger` or the scheduler loads a window and calls OpenAI
|
||||||
- Defaults to `OPENAI_MODEL=gpt-5` with `MaxCompletionTokens`; temperature omitted for reasoning-like models
|
- GPT‑5 context: ~272k input tokens + up to 128k output tokens (400k total)
|
||||||
- Tunables let you follow link targets and group multi-line posts (see env below)
|
- Summaries are concise/natural and integrate multi-line posts, article text (readability-extracted), and image links (vision)
|
||||||
|
|
||||||
6) HTTP API:
|
6) HTTP API:
|
||||||
- `/healthz` → `200 ok`
|
- `/healthz` → `200 ok`
|
||||||
|
|
@ -83,7 +83,7 @@ Runtime modules:
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: ["/sojuboy", "--health"]
|
test: ["CMD", "/sojuboy", "--health"]
|
||||||
interval: 30s
|
interval: 30s
|
||||||
timeout: 3s
|
timeout: 3s
|
||||||
retries: 3
|
retries: 3
|
||||||
|
|
@ -146,6 +146,8 @@ Compose includes a healthcheck calling the binary’s `--health` flag, which ret
|
||||||
|
|
||||||
## Configuration (.env example)
|
## Configuration (.env example)
|
||||||
|
|
||||||
|
Below shows maximum or large/reasonable values. Defaults are noted where they are also the maximum or when relevant.
|
||||||
|
|
||||||
```env
|
```env
|
||||||
# soju / IRC
|
# soju / IRC
|
||||||
SOJU_HOST=bnc.example.org
|
SOJU_HOST=bnc.example.org
|
||||||
|
|
@ -176,22 +178,25 @@ LLM_PROVIDER=openai
|
||||||
OPENAI_API_KEY=sk-...
|
OPENAI_API_KEY=sk-...
|
||||||
OPENAI_BASE_URL=https://api.openai.com/v1
|
OPENAI_BASE_URL=https://api.openai.com/v1
|
||||||
OPENAI_MODEL=gpt-5
|
OPENAI_MODEL=gpt-5
|
||||||
OPENAI_MAX_TOKENS=700
|
# Max completion (output) tokens for GPT‑5 is ~128k (model limit). Default 700.
|
||||||
|
OPENAI_MAX_TOKENS=128000
|
||||||
# Summarizer tuning
|
# Summarizer tuning
|
||||||
SUMM_FOLLOW_LINKS=true # fetch small snippets from referenced links
|
SUMM_FOLLOW_LINKS=true # default true
|
||||||
SUMM_LINK_TIMEOUT=6s # HTTP timeout per link
|
SUMM_LINK_TIMEOUT=20s # no hard max; example large
|
||||||
SUMM_LINK_MAX_BYTES=262144 # max bytes fetched per link
|
SUMM_LINK_MAX_BYTES=1048576 # no hard max; example large (1 MiB/article)
|
||||||
SUMM_GROUP_WINDOW=90s # group multi-line posts within this window
|
SUMM_GROUP_WINDOW=120s # no hard max; example large grouping window
|
||||||
SUMM_MAX_LINKS=5 # limit links fetched per summary
|
SUMM_MAX_LINKS=20 # no strict max; example large
|
||||||
|
SUMM_MAX_GROUPS=20000 # 0=no cap; example large
|
||||||
|
SUMM_TIMEOUT=10m # request timeout; default 5m
|
||||||
|
|
||||||
# Digests
|
# Digests
|
||||||
DIGEST_CRON=0 */6 * * *
|
DIGEST_CRON=0 */6 * * * # every 6 hours
|
||||||
DIGEST_WINDOW=6h
|
DIGEST_WINDOW=24h # no hard max; example large window
|
||||||
QUIET_HOURS=
|
QUIET_HOURS= # e.g., 22:00-07:00
|
||||||
|
|
||||||
# Mentions/alerts
|
# Mentions/alerts
|
||||||
NOTIFY_BACKFILL=false # if true, notify even for replayed (older) messages
|
NOTIFY_BACKFILL=false # default false
|
||||||
MENTION_MIN_INTERVAL=30s # min interval between alerts per channel/keyword
|
MENTION_MIN_INTERVAL=30s # no hard max; rate-limit between alerts
|
||||||
MENTIONS_ONLY_CHANNELS= # optional allow-list (CSV)
|
MENTIONS_ONLY_CHANNELS= # optional allow-list (CSV)
|
||||||
MENTIONS_DENY_CHANNELS= # optional deny-list (CSV)
|
MENTIONS_DENY_CHANNELS= # optional deny-list (CSV)
|
||||||
URGENT_KEYWORDS=urgent,priority # bypass quiet hours
|
URGENT_KEYWORDS=urgent,priority # bypass quiet hours
|
||||||
|
|
@ -202,7 +207,7 @@ HTTP_TOKEN=put-a-long-random-token-here
|
||||||
|
|
||||||
# Storage
|
# Storage
|
||||||
STORE_PATH=/data/app.db
|
STORE_PATH=/data/app.db
|
||||||
STORE_RETENTION_DAYS=7
|
STORE_RETENTION_DAYS=365 # example large retention
|
||||||
|
|
||||||
# Logging
|
# Logging
|
||||||
LOG_LEVEL=info
|
LOG_LEVEL=info
|
||||||
|
|
@ -220,7 +225,7 @@ LOG_LEVEL=info
|
||||||
- Set `OPENAI_API_KEY`
|
- Set `OPENAI_API_KEY`
|
||||||
- Set `OPENAI_BASE_URL` to exactly `https://api.openai.com/v1`
|
- Set `OPENAI_BASE_URL` to exactly `https://api.openai.com/v1`
|
||||||
- If `gpt-5` isn’t available on your account, use a supported model like `gpt-4o-mini`
|
- If `gpt-5` isn’t available on your account, use a supported model like `gpt-4o-mini`
|
||||||
- GPT-5 beta limitations: temperature fixed; use `MaxCompletionTokens`
|
- GPT‑5 limits: ~272k input + 128k output tokens (400k context)
|
||||||
|
|
||||||
## HTTP API
|
## HTTP API
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -108,16 +108,15 @@ func main() {
|
||||||
|
|
||||||
// HTTP API
|
// HTTP API
|
||||||
api := httpapi.Server{
|
api := httpapi.Server{
|
||||||
ListenAddr: cfg.HTTPListen,
|
ListenAddr: cfg.HTTPListen,
|
||||||
AuthToken: cfg.HTTPToken,
|
AuthToken: cfg.HTTPToken,
|
||||||
Store: st,
|
Store: st,
|
||||||
Summarizer: sum,
|
Summarizer: sum,
|
||||||
Notifier: nt,
|
Notifier: nt,
|
||||||
Logger: slog.New(slog.NewJSONHandler(os.Stdout, nil)), // legacy interface still expects *log.Logger; keep minimal text via adapter if needed
|
Logger: slog.New(slog.NewJSONHandler(os.Stdout, nil)),
|
||||||
Metrics: metrics,
|
Metrics: metrics,
|
||||||
Ready: func() bool {
|
Ready: func() bool { return atomic.LoadInt64(&metrics.ConnectedGauge) == 1 },
|
||||||
return atomic.LoadInt64(&metrics.ConnectedGauge) == 1
|
SummarizerTimeout: cfg.SummarizerTimeout,
|
||||||
},
|
|
||||||
}
|
}
|
||||||
go func() {
|
go func() {
|
||||||
if err := api.Start(ctx); err != nil && err != http.ErrServerClosed {
|
if err := api.Start(ctx); err != nil && err != http.ErrServerClosed {
|
||||||
|
|
|
||||||
7
go.mod
7
go.mod
|
|
@ -5,6 +5,7 @@ go 1.23.0
|
||||||
toolchain go1.24.6
|
toolchain go1.24.6
|
||||||
|
|
||||||
require (
|
require (
|
||||||
|
github.com/go-shiori/go-readability v0.0.0-20250217085726-9f5bf5ca7612
|
||||||
github.com/gregdel/pushover v1.3.1
|
github.com/gregdel/pushover v1.3.1
|
||||||
github.com/robfig/cron/v3 v3.0.1
|
github.com/robfig/cron/v3 v3.0.1
|
||||||
github.com/sashabaranov/go-openai v1.41.1
|
github.com/sashabaranov/go-openai v1.41.1
|
||||||
|
|
@ -13,13 +14,19 @@ require (
|
||||||
)
|
)
|
||||||
|
|
||||||
require (
|
require (
|
||||||
|
github.com/andybalholm/cascadia v1.3.3 // indirect
|
||||||
|
github.com/araddon/dateparse v0.0.0-20210429162001-6b43995a97de // indirect
|
||||||
github.com/dustin/go-humanize v1.0.1 // indirect
|
github.com/dustin/go-humanize v1.0.1 // indirect
|
||||||
|
github.com/go-shiori/dom v0.0.0-20230515143342-73569d674e1c // indirect
|
||||||
|
github.com/gogs/chardet v0.0.0-20211120154057-b7413eaefb8f // indirect
|
||||||
github.com/google/uuid v1.6.0 // indirect
|
github.com/google/uuid v1.6.0 // indirect
|
||||||
github.com/mattn/go-isatty v0.0.20 // indirect
|
github.com/mattn/go-isatty v0.0.20 // indirect
|
||||||
github.com/ncruces/go-strftime v0.1.9 // indirect
|
github.com/ncruces/go-strftime v0.1.9 // indirect
|
||||||
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
|
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
|
||||||
golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b // indirect
|
golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b // indirect
|
||||||
|
golang.org/x/net v0.35.0 // indirect
|
||||||
golang.org/x/sys v0.34.0 // indirect
|
golang.org/x/sys v0.34.0 // indirect
|
||||||
|
golang.org/x/text v0.22.0 // indirect
|
||||||
modernc.org/libc v1.66.3 // indirect
|
modernc.org/libc v1.66.3 // indirect
|
||||||
modernc.org/mathutil v1.7.1 // indirect
|
modernc.org/mathutil v1.7.1 // indirect
|
||||||
modernc.org/memory v1.11.0 // indirect
|
modernc.org/memory v1.11.0 // indirect
|
||||||
|
|
|
||||||
96
go.sum
96
go.sum
|
|
@ -1,5 +1,19 @@
|
||||||
|
github.com/andybalholm/cascadia v1.3.3 h1:AG2YHrzJIm4BZ19iwJ/DAua6Btl3IwJX+VI4kktS1LM=
|
||||||
|
github.com/andybalholm/cascadia v1.3.3/go.mod h1:xNd9bqTn98Ln4DwST8/nG+H0yuB8Hmgu1YHNnWw0GeA=
|
||||||
|
github.com/araddon/dateparse v0.0.0-20210429162001-6b43995a97de h1:FxWPpzIjnTlhPwqqXc4/vE0f7GvRjuAsbW+HOIe8KnA=
|
||||||
|
github.com/araddon/dateparse v0.0.0-20210429162001-6b43995a97de/go.mod h1:DCaWoUhZrYW9p1lxo/cm8EmUOOzAPSEZNGF2DK1dJgw=
|
||||||
|
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
|
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||||
|
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
|
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
|
||||||
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
|
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
|
||||||
|
github.com/go-shiori/dom v0.0.0-20230515143342-73569d674e1c h1:wpkoddUomPfHiOziHZixGO5ZBS73cKqVzZipfrLmO1w=
|
||||||
|
github.com/go-shiori/dom v0.0.0-20230515143342-73569d674e1c/go.mod h1:oVDCh3qjJMLVUSILBRwrm+Bc6RNXGZYtoh9xdvf1ffM=
|
||||||
|
github.com/go-shiori/go-readability v0.0.0-20250217085726-9f5bf5ca7612 h1:BYLNYdZaepitbZreRIa9xeCQZocWmy/wj4cGIH0qyw0=
|
||||||
|
github.com/go-shiori/go-readability v0.0.0-20250217085726-9f5bf5ca7612/go.mod h1:wgqthQa8SAYs0yyljVeCOQlZ027VW5CmLsbi9jWC08c=
|
||||||
|
github.com/gogs/chardet v0.0.0-20211120154057-b7413eaefb8f h1:3BSP1Tbs2djlpprl7wCLuiqMaUh5SJkkzI2gDs+FgLs=
|
||||||
|
github.com/gogs/chardet v0.0.0-20211120154057-b7413eaefb8f/go.mod h1:Pcatq5tYkCW2Q6yrR2VRHlbHpZ/R4/7qyL1TCF7vl14=
|
||||||
|
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
|
||||||
github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e h1:ijClszYn+mADRFY17kjQEVQ1XRhq2/JR1M3sGqeJoxs=
|
github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e h1:ijClszYn+mADRFY17kjQEVQ1XRhq2/JR1M3sGqeJoxs=
|
||||||
github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA=
|
github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA=
|
||||||
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
|
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
|
||||||
|
|
@ -8,27 +22,109 @@ github.com/gregdel/pushover v1.3.1 h1:4bMLITOZ15+Zpi6qqoGqOPuVHCwSUvMCgVnN5Xhilf
|
||||||
github.com/gregdel/pushover v1.3.1/go.mod h1:EcaO66Nn1StkpEm1iKtBTV3d2A16SoMsVER1PthX7to=
|
github.com/gregdel/pushover v1.3.1/go.mod h1:EcaO66Nn1StkpEm1iKtBTV3d2A16SoMsVER1PthX7to=
|
||||||
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
|
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
|
||||||
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
|
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
|
||||||
|
github.com/mattn/go-runewidth v0.0.10/go.mod h1:RAqKPSqVFrSLVXbA8x7dzmKdmGzieGRCM46jaSJTDAk=
|
||||||
github.com/ncruces/go-strftime v0.1.9 h1:bY0MQC28UADQmHmaF5dgpLmImcShSi2kHU9XLdhx/f4=
|
github.com/ncruces/go-strftime v0.1.9 h1:bY0MQC28UADQmHmaF5dgpLmImcShSi2kHU9XLdhx/f4=
|
||||||
github.com/ncruces/go-strftime v0.1.9/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls=
|
github.com/ncruces/go-strftime v0.1.9/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls=
|
||||||
|
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||||
|
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||||
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE=
|
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE=
|
||||||
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
|
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
|
||||||
|
github.com/rivo/uniseg v0.1.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
|
||||||
github.com/robfig/cron/v3 v3.0.1 h1:WdRxkvbJztn8LMz/QEvLN5sBU+xKpSqwwUO1Pjr4qDs=
|
github.com/robfig/cron/v3 v3.0.1 h1:WdRxkvbJztn8LMz/QEvLN5sBU+xKpSqwwUO1Pjr4qDs=
|
||||||
github.com/robfig/cron/v3 v3.0.1/go.mod h1:eQICP3HwyT7UooqI/z+Ov+PtYAWygg1TEWWzGIFLtro=
|
github.com/robfig/cron/v3 v3.0.1/go.mod h1:eQICP3HwyT7UooqI/z+Ov+PtYAWygg1TEWWzGIFLtro=
|
||||||
github.com/sashabaranov/go-openai v1.41.1 h1:zf5tM+GuxpyiyD9XZg8nCqu52eYFQg9OOew0gnIuDy4=
|
github.com/sashabaranov/go-openai v1.41.1 h1:zf5tM+GuxpyiyD9XZg8nCqu52eYFQg9OOew0gnIuDy4=
|
||||||
github.com/sashabaranov/go-openai v1.41.1/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
|
github.com/sashabaranov/go-openai v1.41.1/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
|
||||||
|
github.com/scylladb/termtables v0.0.0-20191203121021-c4c0b6d42ff4/go.mod h1:C1a7PQSMz9NShzorzCiG2fk9+xuCgLkPeCvMHYR2OWg=
|
||||||
|
github.com/sergi/go-diff v1.1.0 h1:we8PVUC3FE2uYfodKH/nBHMSetSfHDR6scGdBi+erh0=
|
||||||
|
github.com/sergi/go-diff v1.1.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM=
|
||||||
github.com/sorcix/irc v1.1.4 h1:KDmVMPPzK4kbf3TQw1RsZAqTsh2JL9Zw69hYduX9Ykw=
|
github.com/sorcix/irc v1.1.4 h1:KDmVMPPzK4kbf3TQw1RsZAqTsh2JL9Zw69hYduX9Ykw=
|
||||||
github.com/sorcix/irc v1.1.4/go.mod h1:MhzbySH63tDknqfvAAFK3ps/942g4z9EeJ/4lGgHyZc=
|
github.com/sorcix/irc v1.1.4/go.mod h1:MhzbySH63tDknqfvAAFK3ps/942g4z9EeJ/4lGgHyZc=
|
||||||
|
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||||
|
github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
|
||||||
|
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||||
|
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
|
||||||
|
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
||||||
|
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
|
||||||
|
golang.org/x/crypto v0.13.0/go.mod h1:y6Z2r+Rw4iayiXXAIxJIDAJ1zMW4yaTpebo8fPOliYc=
|
||||||
|
golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU=
|
||||||
|
golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8=
|
||||||
|
golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk=
|
||||||
golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b h1:M2rDM6z3Fhozi9O7NWsxAkg/yqS/lQJ6PmkyIV3YP+o=
|
golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b h1:M2rDM6z3Fhozi9O7NWsxAkg/yqS/lQJ6PmkyIV3YP+o=
|
||||||
golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b/go.mod h1:3//PLf8L/X+8b4vuAfHzxeRUl04Adcb341+IGKfnqS8=
|
golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b/go.mod h1:3//PLf8L/X+8b4vuAfHzxeRUl04Adcb341+IGKfnqS8=
|
||||||
|
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
|
||||||
|
golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
|
||||||
|
golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
|
||||||
|
golang.org/x/mod v0.15.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
|
||||||
|
golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
|
||||||
golang.org/x/mod v0.25.0 h1:n7a+ZbQKQA/Ysbyb0/6IbB1H/X41mKgbhfv7AfG/44w=
|
golang.org/x/mod v0.25.0 h1:n7a+ZbQKQA/Ysbyb0/6IbB1H/X41mKgbhfv7AfG/44w=
|
||||||
golang.org/x/mod v0.25.0/go.mod h1:IXM97Txy2VM4PJ3gI61r1YEk/gAj6zAHN3AdZt6S9Ww=
|
golang.org/x/mod v0.25.0/go.mod h1:IXM97Txy2VM4PJ3gI61r1YEk/gAj6zAHN3AdZt6S9Ww=
|
||||||
|
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||||
|
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
|
||||||
|
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
|
||||||
|
golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
|
||||||
|
golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
|
||||||
|
golang.org/x/net v0.15.0/go.mod h1:idbUs1IY1+zTqbi8yxTbhexhEEk5ur9LInksu6HrEpk=
|
||||||
|
golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44=
|
||||||
|
golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM=
|
||||||
|
golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4=
|
||||||
|
golang.org/x/net v0.35.0 h1:T5GQRQb2y08kTAByq9L4/bz8cipCdA8FbRTXewonqY8=
|
||||||
|
golang.org/x/net v0.35.0/go.mod h1:EglIi67kWsHKlRzzVMUD93VMSWGFOMSZgxFjparz1Qk=
|
||||||
|
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||||
|
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||||
|
golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||||
|
golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y=
|
||||||
|
golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
|
||||||
|
golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
|
||||||
|
golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
|
||||||
golang.org/x/sync v0.15.0 h1:KWH3jNZsfyT6xfAfKiz6MRNmd46ByHDYaZ7KSkCtdW8=
|
golang.org/x/sync v0.15.0 h1:KWH3jNZsfyT6xfAfKiz6MRNmd46ByHDYaZ7KSkCtdW8=
|
||||||
golang.org/x/sync v0.15.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
|
golang.org/x/sync v0.15.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
|
||||||
|
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||||
|
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||||
|
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
|
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
|
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
|
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
|
golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
|
golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
|
golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||||
|
golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||||
|
golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||||
golang.org/x/sys v0.34.0 h1:H5Y5sJ2L2JRdyv7ROF1he/lPdvFsd0mJHFw2ThKHxLA=
|
golang.org/x/sys v0.34.0 h1:H5Y5sJ2L2JRdyv7ROF1he/lPdvFsd0mJHFw2ThKHxLA=
|
||||||
golang.org/x/sys v0.34.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
|
golang.org/x/sys v0.34.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
|
||||||
|
golang.org/x/telemetry v0.0.0-20240228155512-f48c80bd79b2/go.mod h1:TeRTkGYfJXctD9OcfyVLyj2J3IxLnKwHJR8f4D8a3YE=
|
||||||
|
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
||||||
|
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
|
||||||
|
golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
|
||||||
|
golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo=
|
||||||
|
golang.org/x/term v0.12.0/go.mod h1:owVbMEjm3cBLCHdkQu9b1opXd4ETQWc3BhuQGKgXgvU=
|
||||||
|
golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk=
|
||||||
|
golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY=
|
||||||
|
golang.org/x/term v0.27.0/go.mod h1:iMsnZpn0cago0GOrHO2+Y7u7JPn5AylBrcoWkElMTSM=
|
||||||
|
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||||
|
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||||
|
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
|
||||||
|
golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
|
||||||
|
golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
|
||||||
|
golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
|
||||||
|
golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
|
||||||
|
golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
|
||||||
|
golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ=
|
||||||
|
golang.org/x/text v0.22.0 h1:bofq7m3/HAFvbF51jz3Q9wLg3jkvSPuiZu/pD1XwgtM=
|
||||||
|
golang.org/x/text v0.22.0/go.mod h1:YRoo4H8PVmsu+E3Ou7cqLVH8oXWIHVoX0jqUWALQhfY=
|
||||||
|
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||||
|
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
|
||||||
|
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
|
||||||
|
golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
|
||||||
|
golang.org/x/tools v0.13.0/go.mod h1:HvlwmtVNQAhOuCjW7xxvovg8wbNq7LwfXh/k7wXUl58=
|
||||||
|
golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk=
|
||||||
golang.org/x/tools v0.34.0 h1:qIpSLOxeCYGg9TrcJokLBG4KFA6d795g0xkBkiESGlo=
|
golang.org/x/tools v0.34.0 h1:qIpSLOxeCYGg9TrcJokLBG4KFA6d795g0xkBkiESGlo=
|
||||||
golang.org/x/tools v0.34.0/go.mod h1:pAP9OwEaY1CAW3HOmg3hLZC5Z0CCmzjAF2UQMSqNARg=
|
golang.org/x/tools v0.34.0/go.mod h1:pAP9OwEaY1CAW3HOmg3hLZC5Z0CCmzjAF2UQMSqNARg=
|
||||||
|
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||||
|
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||||
|
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||||
|
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||||
|
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||||
modernc.org/cc/v4 v4.26.2 h1:991HMkLjJzYBIfha6ECZdjrIYz2/1ayr+FL8GN+CNzM=
|
modernc.org/cc/v4 v4.26.2 h1:991HMkLjJzYBIfha6ECZdjrIYz2/1ayr+FL8GN+CNzM=
|
||||||
modernc.org/cc/v4 v4.26.2/go.mod h1:uVtb5OGqUKpoLWhqwNQo/8LwvoiEBLvZXIQ/SmO6mL0=
|
modernc.org/cc/v4 v4.26.2/go.mod h1:uVtb5OGqUKpoLWhqwNQo/8LwvoiEBLvZXIQ/SmO6mL0=
|
||||||
modernc.org/ccgo/v4 v4.28.0 h1:rjznn6WWehKq7dG4JtLRKxb52Ecv8OUGah8+Z/SfpNU=
|
modernc.org/ccgo/v4 v4.28.0 h1:rjznn6WWehKq7dG4JtLRKxb52Ecv8OUGah8+Z/SfpNU=
|
||||||
|
|
|
||||||
|
|
@ -28,16 +28,18 @@ type Config struct {
|
||||||
PushoverAPIToken string
|
PushoverAPIToken string
|
||||||
|
|
||||||
// Summarizer / LLM
|
// Summarizer / LLM
|
||||||
LLMProvider string
|
LLMProvider string
|
||||||
OpenAIAPIKey string
|
OpenAIAPIKey string
|
||||||
OpenAIBaseURL string
|
OpenAIBaseURL string
|
||||||
OpenAIModel string
|
OpenAIModel string
|
||||||
OpenAIMaxTokens int
|
OpenAIMaxTokens int
|
||||||
SummFollowLinks bool
|
SummFollowLinks bool
|
||||||
SummLinkTimeout time.Duration
|
SummLinkTimeout time.Duration
|
||||||
SummLinkMaxBytes int
|
SummLinkMaxBytes int
|
||||||
SummGroupWindow time.Duration
|
SummGroupWindow time.Duration
|
||||||
SummMaxLinks int
|
SummMaxLinks int
|
||||||
|
SummMaxGroups int
|
||||||
|
SummarizerTimeout time.Duration
|
||||||
|
|
||||||
// Digests
|
// Digests
|
||||||
DigestCron string
|
DigestCron string
|
||||||
|
|
@ -90,6 +92,8 @@ func FromEnv() Config {
|
||||||
cfg.SummLinkMaxBytes = getEnvInt("SUMM_LINK_MAX_BYTES", 262144)
|
cfg.SummLinkMaxBytes = getEnvInt("SUMM_LINK_MAX_BYTES", 262144)
|
||||||
cfg.SummGroupWindow = getEnvDuration("SUMM_GROUP_WINDOW", 90*time.Second)
|
cfg.SummGroupWindow = getEnvDuration("SUMM_GROUP_WINDOW", 90*time.Second)
|
||||||
cfg.SummMaxLinks = getEnvInt("SUMM_MAX_LINKS", 5)
|
cfg.SummMaxLinks = getEnvInt("SUMM_MAX_LINKS", 5)
|
||||||
|
cfg.SummMaxGroups = getEnvInt("SUMM_MAX_GROUPS", 0)
|
||||||
|
cfg.SummarizerTimeout = getEnvDuration("SUMM_TIMEOUT", 5*time.Minute)
|
||||||
|
|
||||||
cfg.DigestCron = getEnv("DIGEST_CRON", "0 */6 * * *")
|
cfg.DigestCron = getEnv("DIGEST_CRON", "0 */6 * * *")
|
||||||
cfg.DigestWindow = getEnvDuration("DIGEST_WINDOW", 6*time.Hour)
|
cfg.DigestWindow = getEnvDuration("DIGEST_WINDOW", 6*time.Hour)
|
||||||
|
|
|
||||||
|
|
@ -32,6 +32,8 @@ type Server struct {
|
||||||
Logger *slog.Logger
|
Logger *slog.Logger
|
||||||
Metrics *Metrics
|
Metrics *Metrics
|
||||||
Ready func() bool
|
Ready func() bool
|
||||||
|
// Optional timeout override for summarizer
|
||||||
|
SummarizerTimeout time.Duration
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *Server) Start(ctx context.Context) error {
|
func (s *Server) Start(ctx context.Context) error {
|
||||||
|
|
@ -106,8 +108,12 @@ func (s *Server) handleTrigger(w http.ResponseWriter, r *http.Request) {
|
||||||
_, _ = w.Write([]byte("summarizer not configured"))
|
_, _ = w.Write([]byte("summarizer not configured"))
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
// Timeout summarization to avoid hung requests.
|
// Timeout summarization using configurable timeout (default 5m)
|
||||||
ctxSum, cancel := context.WithTimeout(ctx, 60*time.Second)
|
tout := s.SummarizerTimeout
|
||||||
|
if tout <= 0 {
|
||||||
|
tout = 5 * time.Minute
|
||||||
|
}
|
||||||
|
ctxSum, cancel := context.WithTimeout(ctx, tout)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
summary, err := s.Summarizer.Summarize(ctxSum, channel, msgs, window)
|
summary, err := s.Summarizer.Summarize(ctxSum, channel, msgs, window)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|
|
||||||
|
|
@ -4,10 +4,14 @@ import (
|
||||||
"context"
|
"context"
|
||||||
"io"
|
"io"
|
||||||
"net/http"
|
"net/http"
|
||||||
|
"net/url"
|
||||||
"regexp"
|
"regexp"
|
||||||
|
"sort"
|
||||||
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
readability "github.com/go-shiori/go-readability"
|
||||||
openai "github.com/sashabaranov/go-openai"
|
openai "github.com/sashabaranov/go-openai"
|
||||||
|
|
||||||
"sojuboy/internal/config"
|
"sojuboy/internal/config"
|
||||||
|
|
@ -25,6 +29,7 @@ type OpenAI struct {
|
||||||
linkMaxBytes int
|
linkMaxBytes int
|
||||||
groupWindow time.Duration
|
groupWindow time.Duration
|
||||||
maxLinks int
|
maxLinks int
|
||||||
|
maxGroups int
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewOpenAI(apiKey, baseURL, model string, maxTokens int) *OpenAI {
|
func NewOpenAI(apiKey, baseURL, model string, maxTokens int) *OpenAI {
|
||||||
|
|
@ -40,6 +45,7 @@ func (o *OpenAI) ApplyConfig(cfg config.Config) {
|
||||||
o.linkMaxBytes = cfg.SummLinkMaxBytes
|
o.linkMaxBytes = cfg.SummLinkMaxBytes
|
||||||
o.groupWindow = cfg.SummGroupWindow
|
o.groupWindow = cfg.SummGroupWindow
|
||||||
o.maxLinks = cfg.SummMaxLinks
|
o.maxLinks = cfg.SummMaxLinks
|
||||||
|
o.maxGroups = cfg.SummMaxGroups
|
||||||
}
|
}
|
||||||
|
|
||||||
func (o *OpenAI) Summarize(ctx context.Context, channel string, msgs []store.Message, window time.Duration) (string, error) {
|
func (o *OpenAI) Summarize(ctx context.Context, channel string, msgs []store.Message, window time.Duration) (string, error) {
|
||||||
|
|
@ -54,11 +60,25 @@ func (o *OpenAI) Summarize(ctx context.Context, channel string, msgs []store.Mes
|
||||||
|
|
||||||
// 1) Group multiline posts from same author within groupWindow
|
// 1) Group multiline posts from same author within groupWindow
|
||||||
grouped := groupMessages(msgs, o.groupWindow)
|
grouped := groupMessages(msgs, o.groupWindow)
|
||||||
|
// Apply group cap if configured (>0). 0 means no cap.
|
||||||
|
if o.maxGroups > 0 && len(grouped) > o.maxGroups {
|
||||||
|
grouped = grouped[len(grouped)-o.maxGroups:]
|
||||||
|
}
|
||||||
|
|
||||||
// 2) Extract links and optionally fetch a small amount of content
|
// 2) Extract links and optionally fetch content
|
||||||
links := extractLinks(grouped)
|
links := extractLinks(grouped)
|
||||||
if o.followLinks && len(links) > 0 {
|
// Split image vs non-image
|
||||||
links = fetchLinkSnippets(ctx, links, o.linkTimeout, o.linkMaxBytes, o.maxLinks)
|
var imageURLs []string
|
||||||
|
var nonImageLinks []linkSnippet
|
||||||
|
for _, l := range links {
|
||||||
|
if isImageURL(l.url) {
|
||||||
|
imageURLs = append(imageURLs, l.url)
|
||||||
|
} else {
|
||||||
|
nonImageLinks = append(nonImageLinks, l)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if o.followLinks && len(nonImageLinks) > 0 {
|
||||||
|
nonImageLinks = fetchLinkSnippets(ctx, nonImageLinks, o.linkTimeout, o.linkMaxBytes, o.maxLinks)
|
||||||
}
|
}
|
||||||
|
|
||||||
// 3) Build a concise, natural prompt
|
// 3) Build a concise, natural prompt
|
||||||
|
|
@ -76,9 +96,9 @@ func (o *OpenAI) Summarize(ctx context.Context, channel string, msgs []store.Mes
|
||||||
b.WriteString(g.text)
|
b.WriteString(g.text)
|
||||||
b.WriteString("\n")
|
b.WriteString("\n")
|
||||||
}
|
}
|
||||||
if len(links) > 0 {
|
if len(nonImageLinks) > 0 {
|
||||||
b.WriteString("\nReferenced content (snippets):\n")
|
b.WriteString("\nReferenced content (snippets):\n")
|
||||||
for _, ln := range links {
|
for _, ln := range nonImageLinks {
|
||||||
b.WriteString("- ")
|
b.WriteString("- ")
|
||||||
b.WriteString(ln.url)
|
b.WriteString(ln.url)
|
||||||
b.WriteString(" → ")
|
b.WriteString(" → ")
|
||||||
|
|
@ -101,11 +121,25 @@ func (o *OpenAI) Summarize(ctx context.Context, channel string, msgs []store.Mes
|
||||||
}
|
}
|
||||||
reasoningLike := strings.HasPrefix(model, "gpt-5") || strings.HasPrefix(model, "o1") || strings.Contains(model, "reasoning")
|
reasoningLike := strings.HasPrefix(model, "gpt-5") || strings.HasPrefix(model, "o1") || strings.Contains(model, "reasoning")
|
||||||
|
|
||||||
|
// Build multimodal user message parts
|
||||||
|
userParts := []openai.ChatMessagePart{{Type: openai.ChatMessagePartTypeText, Text: prompt}}
|
||||||
|
// Limit images to o.maxLinks to avoid overloading
|
||||||
|
maxImgs := o.maxLinks
|
||||||
|
if len(imageURLs) > maxImgs {
|
||||||
|
imageURLs = imageURLs[:maxImgs]
|
||||||
|
}
|
||||||
|
for _, u := range imageURLs {
|
||||||
|
userParts = append(userParts, openai.ChatMessagePart{
|
||||||
|
Type: openai.ChatMessagePartTypeImageURL,
|
||||||
|
ImageURL: &openai.ChatMessageImageURL{URL: u},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
req := openai.ChatCompletionRequest{
|
req := openai.ChatCompletionRequest{
|
||||||
Model: model,
|
Model: model,
|
||||||
Messages: []openai.ChatCompletionMessage{
|
Messages: []openai.ChatCompletionMessage{
|
||||||
{Role: openai.ChatMessageRoleSystem, Content: sys},
|
{Role: openai.ChatMessageRoleSystem, Content: sys},
|
||||||
{Role: openai.ChatMessageRoleUser, Content: prompt},
|
{Role: openai.ChatMessageRoleUser, MultiContent: userParts},
|
||||||
},
|
},
|
||||||
MaxCompletionTokens: o.maxTokens,
|
MaxCompletionTokens: o.maxTokens,
|
||||||
}
|
}
|
||||||
|
|
@ -118,9 +152,31 @@ func (o *OpenAI) Summarize(ctx context.Context, channel string, msgs []store.Mes
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
if len(resp.Choices) == 0 {
|
if len(resp.Choices) == 0 {
|
||||||
return "", nil
|
return localFallbackSummary(grouped, append(nonImageLinks, linksFromImages(imageURLs)...)), nil
|
||||||
}
|
}
|
||||||
return strings.TrimSpace(resp.Choices[0].Message.Content), nil
|
out := strings.TrimSpace(resp.Choices[0].Message.Content)
|
||||||
|
if out == "" {
|
||||||
|
return localFallbackSummary(grouped, append(nonImageLinks, linksFromImages(imageURLs)...)), nil
|
||||||
|
}
|
||||||
|
return out, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func linksFromImages(imgs []string) []linkSnippet {
|
||||||
|
out := make([]linkSnippet, 0, len(imgs))
|
||||||
|
for _, u := range imgs {
|
||||||
|
out = append(out, linkSnippet{url: u})
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
func isImageURL(u string) bool {
|
||||||
|
lu := strings.ToLower(u)
|
||||||
|
for _, ext := range []string{".jpg", ".jpeg", ".png", ".gif", ".webp"} {
|
||||||
|
if strings.HasSuffix(lu, ext) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
type linkSnippet struct {
|
type linkSnippet struct {
|
||||||
|
|
@ -162,7 +218,16 @@ func extractLinks(msgs []groupedMsg) []linkSnippet {
|
||||||
links = append(links, linkSnippet{url: m})
|
links = append(links, linkSnippet{url: m})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return links
|
// de-dup
|
||||||
|
saw := make(map[string]bool)
|
||||||
|
dedup := make([]linkSnippet, 0, len(links))
|
||||||
|
for _, l := range links {
|
||||||
|
if !saw[l.url] {
|
||||||
|
saw[l.url] = true
|
||||||
|
dedup = append(dedup, l)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return dedup
|
||||||
}
|
}
|
||||||
|
|
||||||
func fetchLinkSnippets(ctx context.Context, links []linkSnippet, timeout time.Duration, maxBytes int, maxLinks int) []linkSnippet {
|
func fetchLinkSnippets(ctx context.Context, links []linkSnippet, timeout time.Duration, maxBytes int, maxLinks int) []linkSnippet {
|
||||||
|
|
@ -190,15 +255,76 @@ func fetchLinkSnippets(ctx context.Context, links []linkSnippet, timeout time.Du
|
||||||
if err != nil || len(b) == 0 {
|
if err != nil || len(b) == 0 {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
// naive text cleanup
|
|
||||||
text := string(b)
|
text := string(b)
|
||||||
|
// Try readability for cleaner article text
|
||||||
|
if baseURL, perr := url.Parse(ln.url); perr == nil {
|
||||||
|
if art, err := readability.FromReader(strings.NewReader(text), baseURL); err == nil {
|
||||||
|
if at := strings.TrimSpace(art.TextContent); at != "" {
|
||||||
|
text = at
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
text = strings.ReplaceAll(text, "\r", "")
|
text = strings.ReplaceAll(text, "\r", "")
|
||||||
text = strings.TrimSpace(text)
|
text = strings.TrimSpace(text)
|
||||||
if len(text) > 800 {
|
if len(text) > 2000 {
|
||||||
text = text[:800]
|
text = text[:2000]
|
||||||
}
|
}
|
||||||
out = append(out, linkSnippet{url: ln.url, snippet: text})
|
out = append(out, linkSnippet{url: ln.url, snippet: text})
|
||||||
}()
|
}()
|
||||||
}
|
}
|
||||||
return out
|
return out
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func localFallbackSummary(grouped []groupedMsg, links []linkSnippet) string {
|
||||||
|
if len(grouped) == 0 {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
// simple counts
|
||||||
|
authors := map[string]int{}
|
||||||
|
for _, g := range grouped {
|
||||||
|
authors[g.author]++
|
||||||
|
}
|
||||||
|
authorList := make([]string, 0, len(authors))
|
||||||
|
for a := range authors {
|
||||||
|
authorList = append(authorList, a)
|
||||||
|
}
|
||||||
|
sort.Strings(authorList)
|
||||||
|
var b strings.Builder
|
||||||
|
b.WriteString("Summary (fallback)\n")
|
||||||
|
b.WriteString("- Messages: ")
|
||||||
|
b.WriteString(strconvI(len(grouped)))
|
||||||
|
b.WriteString(" groups by ")
|
||||||
|
b.WriteString(strconvI(len(authors)))
|
||||||
|
b.WriteString(" authors\n")
|
||||||
|
if len(links) > 0 {
|
||||||
|
b.WriteString("- Links: ")
|
||||||
|
for i, l := range links {
|
||||||
|
if i > 0 {
|
||||||
|
b.WriteString(", ")
|
||||||
|
}
|
||||||
|
b.WriteString(l.url)
|
||||||
|
}
|
||||||
|
b.WriteString("\n")
|
||||||
|
}
|
||||||
|
// include last few grouped lines as a teaser
|
||||||
|
tail := grouped
|
||||||
|
if len(tail) > 5 {
|
||||||
|
tail = tail[len(tail)-5:]
|
||||||
|
}
|
||||||
|
for _, g := range tail {
|
||||||
|
b.WriteString("• ")
|
||||||
|
b.WriteString(g.author)
|
||||||
|
b.WriteString(": ")
|
||||||
|
line := g.text
|
||||||
|
if len(line) > 200 {
|
||||||
|
line = line[:200] + "…"
|
||||||
|
}
|
||||||
|
b.WriteString(line)
|
||||||
|
b.WriteString("\n")
|
||||||
|
}
|
||||||
|
return strings.TrimSpace(b.String())
|
||||||
|
}
|
||||||
|
|
||||||
|
func strconvI(n int) string {
|
||||||
|
return strconv.Itoa(n)
|
||||||
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue