feat(cards): improve OG/Twitter parsing (more keys, absolute image URLs, larger read, UA headers); load Twitter widgets.js to render media

This commit is contained in:
Thomas Cravey 2025-08-17 16:14:24 -05:00
parent 15f7f3ac96
commit 962469bd76
2 changed files with 69 additions and 38 deletions

View file

@ -273,7 +273,9 @@ func (s *Server) handleLinkCard(w http.ResponseWriter, r *http.Request) {
// fetch minimal HTML and extract tags using a tolerant HTML parser
client := &http.Client{Timeout: 10 * time.Second}
req, _ := http.NewRequestWithContext(r.Context(), http.MethodGet, raw, nil)
req.Header.Set("User-Agent", "Mozilla/5.0 (compatible; sojuboy/1.0)")
req.Header.Set("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0 Safari/537.36")
req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8")
req.Header.Set("Accept-Language", "en-US,en;q=0.9")
resp, err := client.Do(req)
if err != nil {
w.WriteHeader(http.StatusBadGateway)
@ -296,8 +298,8 @@ func (s *Server) handleLinkCard(w http.ResponseWriter, r *http.Request) {
return
}
// limit to 256KB and parse tokens
limited := http.MaxBytesReader(w, resp.Body, 262144)
// limit to 768KB and parse tokens
limited := http.MaxBytesReader(w, resp.Body, 786432)
doc, err := xhtml.Parse(limited)
if err != nil {
w.WriteHeader(http.StatusBadGateway)
@ -307,7 +309,8 @@ func (s *Server) handleLinkCard(w http.ResponseWriter, r *http.Request) {
var title, desc, img string
var walker func(*xhtml.Node)
walker = func(n *xhtml.Node) {
if n.Type == xhtml.ElementNode && strings.EqualFold(n.Data, "meta") {
if n.Type == xhtml.ElementNode {
if strings.EqualFold(n.Data, "meta") {
// property or name + content
var pn = ""
var nm = ""
@ -330,21 +333,37 @@ func (s *Server) handleLinkCard(w http.ResponseWriter, r *http.Request) {
if title == "" {
title = content
}
case "og:description", "twitter:description":
case "og:description", "twitter:description", "description":
if desc == "" {
desc = content
}
case "og:image", "twitter:image":
case "og:image", "og:image:url", "og:image:secure_url", "twitter:image", "twitter:image:src":
if img == "" {
img = content
}
}
}
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
walker(c)
}
}
walker(doc)
// normalize image URL
if img != "" {
if strings.HasPrefix(img, "//") {
if u.Scheme == "" {
u.Scheme = "https"
}
img = u.Scheme + ":" + img
} else if !strings.HasPrefix(img, "http://") && !strings.HasPrefix(img, "https://") {
if ref, err := url.Parse(img); err == nil {
img = u.ResolveReference(ref).String()
}
}
}
card := linkCard{URL: raw, Title: strings.TrimSpace(title), Description: strings.TrimSpace(desc), Image: strings.TrimSpace(img)}
// cache for 24h
s.cardCache[raw] = card
@ -371,8 +390,12 @@ func (s *Server) handleLinkSummary(w http.ResponseWriter, r *http.Request) {
_, _ = w.Write([]byte("summarizer not configured"))
return
}
if s.summaryCache == nil { s.summaryCache = make(map[string]string) }
if s.summaryCacheExp == nil { s.summaryCacheExp = make(map[string]time.Time) }
if s.summaryCache == nil {
s.summaryCache = make(map[string]string)
}
if s.summaryCacheExp == nil {
s.summaryCacheExp = make(map[string]time.Time)
}
if exp, ok := s.summaryCacheExp[raw]; ok && time.Now().Before(exp) {
w.Header().Set("Content-Type", "application/json")
_ = json.NewEncoder(w).Encode(map[string]any{"summary": s.summaryCache[raw]})
@ -380,8 +403,12 @@ func (s *Server) handleLinkSummary(w http.ResponseWriter, r *http.Request) {
}
msgs := []store.Message{{Channel: "#links", Author: "link", Body: raw, Time: time.Now().UTC()}}
tout := s.SummarizerTimeout
if tout <= 0 { tout = 5 * time.Minute }
if tout > 2*time.Minute { tout = 2 * time.Minute }
if tout <= 0 {
tout = 5 * time.Minute
}
if tout > 2*time.Minute {
tout = 2 * time.Minute
}
ctx, cancel := context.WithTimeout(r.Context(), tout)
defer cancel()
sum, err := s.Summarizer.Summarize(ctx, "#links", msgs, 0)
@ -390,7 +417,9 @@ func (s *Server) handleLinkSummary(w http.ResponseWriter, r *http.Request) {
_, _ = w.Write([]byte("summarizer error"))
return
}
if sum == "" { sum = "(no summary)" }
if sum == "" {
sum = "(no summary)"
}
s.summaryCache[raw] = sum
s.summaryCacheExp[raw] = time.Now().Add(24 * time.Hour)
w.Header().Set("Content-Type", "application/json")

View file

@ -1,5 +1,5 @@
// Shared state
const st = { tailLoading: false, atBottom: true, current: '#', earliest: null, sse: null, channels: [] };
const st = { tailLoading: false, atBottom: true, current: '#', earliest: null, sse: null, channels: [], twLoaded: false };
function measureBars(){
const hdr = document.querySelector('header.nav');
@ -26,6 +26,8 @@ async function api(path, params){
return res.text();
}
function ensureTwitterWidgets(){ if(st.twLoaded) return; st.twLoaded = true; const s=document.createElement('script'); s.async=true; s.src='https://platform.twitter.com/widgets.js'; document.head.appendChild(s); }
function appendBatch(arr){ const el=document.getElementById('tail'); const frag=document.createDocumentFragment(); arr.forEach(m=>{ const div=document.createElement('div'); div.className='msg'; div.innerHTML=lineHTML(m); frag.appendChild(div); processLinks(div); }); el.appendChild(frag); pinBottomMulti(); }
function prependBatch(arr){ const el=document.getElementById('tail'); const oldTop=el.firstChild; const beforeTop = oldTop ? oldTop.getBoundingClientRect().top : 0; const frag=document.createDocumentFragment(); arr.forEach(m=>{ const div=document.createElement('div'); div.className='msg'; div.innerHTML=lineHTML(m); frag.appendChild(div); processLinks(div); }); el.insertBefore(frag, el.firstChild); if(oldTop){ const afterTop = oldTop.getBoundingClientRect().top; const delta = afterTop - beforeTop; window.scrollBy(0, delta); } }
@ -43,7 +45,7 @@ function processLinks(scope){ const links = scope.querySelectorAll('a[href]:not(
const row = document.createElement('div'); row.style.display='flex'; row.style.alignItems='flex-start'; row.style.gap='.5rem'; row.innerHTML = html;
c.appendChild(row);
c.querySelectorAll('img').forEach(img=> img.addEventListener('load', ()=> pinBottomMulti()));
if(card.html){ const wrap=document.createElement('div'); wrap.innerHTML=card.html; c.appendChild(wrap); }
if(card.html){ const wrap=document.createElement('div'); wrap.innerHTML=card.html; c.appendChild(wrap); ensureTwitterWidgets(); }
// Summary control row
const ctrl = document.createElement('div'); ctrl.style.marginTop='.25rem';
const btn = document.createElement('button'); btn.type='button'; btn.title='Summarize this link'; btn.textContent='\u25B6'; btn.style.padding='0 .4rem'; btn.style.fontSize='.9rem';