feat(cards): improve OG/Twitter parsing (more keys, absolute image URLs, larger read, UA headers); load Twitter widgets.js to render media
This commit is contained in:
parent
15f7f3ac96
commit
962469bd76
2 changed files with 69 additions and 38 deletions
|
|
@ -273,7 +273,9 @@ func (s *Server) handleLinkCard(w http.ResponseWriter, r *http.Request) {
|
|||
// fetch minimal HTML and extract tags using a tolerant HTML parser
|
||||
client := &http.Client{Timeout: 10 * time.Second}
|
||||
req, _ := http.NewRequestWithContext(r.Context(), http.MethodGet, raw, nil)
|
||||
req.Header.Set("User-Agent", "Mozilla/5.0 (compatible; sojuboy/1.0)")
|
||||
req.Header.Set("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0 Safari/537.36")
|
||||
req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8")
|
||||
req.Header.Set("Accept-Language", "en-US,en;q=0.9")
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
w.WriteHeader(http.StatusBadGateway)
|
||||
|
|
@ -296,8 +298,8 @@ func (s *Server) handleLinkCard(w http.ResponseWriter, r *http.Request) {
|
|||
return
|
||||
}
|
||||
|
||||
// limit to 256KB and parse tokens
|
||||
limited := http.MaxBytesReader(w, resp.Body, 262144)
|
||||
// limit to 768KB and parse tokens
|
||||
limited := http.MaxBytesReader(w, resp.Body, 786432)
|
||||
doc, err := xhtml.Parse(limited)
|
||||
if err != nil {
|
||||
w.WriteHeader(http.StatusBadGateway)
|
||||
|
|
@ -307,36 +309,38 @@ func (s *Server) handleLinkCard(w http.ResponseWriter, r *http.Request) {
|
|||
var title, desc, img string
|
||||
var walker func(*xhtml.Node)
|
||||
walker = func(n *xhtml.Node) {
|
||||
if n.Type == xhtml.ElementNode && strings.EqualFold(n.Data, "meta") {
|
||||
// property or name + content
|
||||
var pn = ""
|
||||
var nm = ""
|
||||
var content = ""
|
||||
for _, a := range n.Attr {
|
||||
if strings.EqualFold(a.Key, "property") {
|
||||
pn = a.Val
|
||||
} else if strings.EqualFold(a.Key, "name") {
|
||||
nm = a.Val
|
||||
} else if strings.EqualFold(a.Key, "content") {
|
||||
content = a.Val
|
||||
if n.Type == xhtml.ElementNode {
|
||||
if strings.EqualFold(n.Data, "meta") {
|
||||
// property or name + content
|
||||
var pn = ""
|
||||
var nm = ""
|
||||
var content = ""
|
||||
for _, a := range n.Attr {
|
||||
if strings.EqualFold(a.Key, "property") {
|
||||
pn = a.Val
|
||||
} else if strings.EqualFold(a.Key, "name") {
|
||||
nm = a.Val
|
||||
} else if strings.EqualFold(a.Key, "content") {
|
||||
content = a.Val
|
||||
}
|
||||
}
|
||||
}
|
||||
key := strings.ToLower(pn)
|
||||
if key == "" {
|
||||
key = strings.ToLower(nm)
|
||||
}
|
||||
switch key {
|
||||
case "og:title", "twitter:title":
|
||||
if title == "" {
|
||||
title = content
|
||||
key := strings.ToLower(pn)
|
||||
if key == "" {
|
||||
key = strings.ToLower(nm)
|
||||
}
|
||||
case "og:description", "twitter:description":
|
||||
if desc == "" {
|
||||
desc = content
|
||||
}
|
||||
case "og:image", "twitter:image":
|
||||
if img == "" {
|
||||
img = content
|
||||
switch key {
|
||||
case "og:title", "twitter:title":
|
||||
if title == "" {
|
||||
title = content
|
||||
}
|
||||
case "og:description", "twitter:description", "description":
|
||||
if desc == "" {
|
||||
desc = content
|
||||
}
|
||||
case "og:image", "og:image:url", "og:image:secure_url", "twitter:image", "twitter:image:src":
|
||||
if img == "" {
|
||||
img = content
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -345,6 +349,21 @@ func (s *Server) handleLinkCard(w http.ResponseWriter, r *http.Request) {
|
|||
}
|
||||
}
|
||||
walker(doc)
|
||||
|
||||
// normalize image URL
|
||||
if img != "" {
|
||||
if strings.HasPrefix(img, "//") {
|
||||
if u.Scheme == "" {
|
||||
u.Scheme = "https"
|
||||
}
|
||||
img = u.Scheme + ":" + img
|
||||
} else if !strings.HasPrefix(img, "http://") && !strings.HasPrefix(img, "https://") {
|
||||
if ref, err := url.Parse(img); err == nil {
|
||||
img = u.ResolveReference(ref).String()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
card := linkCard{URL: raw, Title: strings.TrimSpace(title), Description: strings.TrimSpace(desc), Image: strings.TrimSpace(img)}
|
||||
// cache for 24h
|
||||
s.cardCache[raw] = card
|
||||
|
|
@ -371,8 +390,12 @@ func (s *Server) handleLinkSummary(w http.ResponseWriter, r *http.Request) {
|
|||
_, _ = w.Write([]byte("summarizer not configured"))
|
||||
return
|
||||
}
|
||||
if s.summaryCache == nil { s.summaryCache = make(map[string]string) }
|
||||
if s.summaryCacheExp == nil { s.summaryCacheExp = make(map[string]time.Time) }
|
||||
if s.summaryCache == nil {
|
||||
s.summaryCache = make(map[string]string)
|
||||
}
|
||||
if s.summaryCacheExp == nil {
|
||||
s.summaryCacheExp = make(map[string]time.Time)
|
||||
}
|
||||
if exp, ok := s.summaryCacheExp[raw]; ok && time.Now().Before(exp) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
_ = json.NewEncoder(w).Encode(map[string]any{"summary": s.summaryCache[raw]})
|
||||
|
|
@ -380,8 +403,12 @@ func (s *Server) handleLinkSummary(w http.ResponseWriter, r *http.Request) {
|
|||
}
|
||||
msgs := []store.Message{{Channel: "#links", Author: "link", Body: raw, Time: time.Now().UTC()}}
|
||||
tout := s.SummarizerTimeout
|
||||
if tout <= 0 { tout = 5 * time.Minute }
|
||||
if tout > 2*time.Minute { tout = 2 * time.Minute }
|
||||
if tout <= 0 {
|
||||
tout = 5 * time.Minute
|
||||
}
|
||||
if tout > 2*time.Minute {
|
||||
tout = 2 * time.Minute
|
||||
}
|
||||
ctx, cancel := context.WithTimeout(r.Context(), tout)
|
||||
defer cancel()
|
||||
sum, err := s.Summarizer.Summarize(ctx, "#links", msgs, 0)
|
||||
|
|
@ -390,7 +417,9 @@ func (s *Server) handleLinkSummary(w http.ResponseWriter, r *http.Request) {
|
|||
_, _ = w.Write([]byte("summarizer error"))
|
||||
return
|
||||
}
|
||||
if sum == "" { sum = "(no summary)" }
|
||||
if sum == "" {
|
||||
sum = "(no summary)"
|
||||
}
|
||||
s.summaryCache[raw] = sum
|
||||
s.summaryCacheExp[raw] = time.Now().Add(24 * time.Hour)
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
// Shared state
|
||||
const st = { tailLoading: false, atBottom: true, current: '#', earliest: null, sse: null, channels: [] };
|
||||
const st = { tailLoading: false, atBottom: true, current: '#', earliest: null, sse: null, channels: [], twLoaded: false };
|
||||
|
||||
function measureBars(){
|
||||
const hdr = document.querySelector('header.nav');
|
||||
|
|
@ -26,6 +26,8 @@ async function api(path, params){
|
|||
return res.text();
|
||||
}
|
||||
|
||||
function ensureTwitterWidgets(){ if(st.twLoaded) return; st.twLoaded = true; const s=document.createElement('script'); s.async=true; s.src='https://platform.twitter.com/widgets.js'; document.head.appendChild(s); }
|
||||
|
||||
function appendBatch(arr){ const el=document.getElementById('tail'); const frag=document.createDocumentFragment(); arr.forEach(m=>{ const div=document.createElement('div'); div.className='msg'; div.innerHTML=lineHTML(m); frag.appendChild(div); processLinks(div); }); el.appendChild(frag); pinBottomMulti(); }
|
||||
function prependBatch(arr){ const el=document.getElementById('tail'); const oldTop=el.firstChild; const beforeTop = oldTop ? oldTop.getBoundingClientRect().top : 0; const frag=document.createDocumentFragment(); arr.forEach(m=>{ const div=document.createElement('div'); div.className='msg'; div.innerHTML=lineHTML(m); frag.appendChild(div); processLinks(div); }); el.insertBefore(frag, el.firstChild); if(oldTop){ const afterTop = oldTop.getBoundingClientRect().top; const delta = afterTop - beforeTop; window.scrollBy(0, delta); } }
|
||||
|
||||
|
|
@ -43,7 +45,7 @@ function processLinks(scope){ const links = scope.querySelectorAll('a[href]:not(
|
|||
const row = document.createElement('div'); row.style.display='flex'; row.style.alignItems='flex-start'; row.style.gap='.5rem'; row.innerHTML = html;
|
||||
c.appendChild(row);
|
||||
c.querySelectorAll('img').forEach(img=> img.addEventListener('load', ()=> pinBottomMulti()));
|
||||
if(card.html){ const wrap=document.createElement('div'); wrap.innerHTML=card.html; c.appendChild(wrap); }
|
||||
if(card.html){ const wrap=document.createElement('div'); wrap.innerHTML=card.html; c.appendChild(wrap); ensureTwitterWidgets(); }
|
||||
// Summary control row
|
||||
const ctrl = document.createElement('div'); ctrl.style.marginTop='.25rem';
|
||||
const btn = document.createElement('button'); btn.type='button'; btn.title='Summarize this link'; btn.textContent='\u25B6'; btn.style.padding='0 .4rem'; btn.style.fontSize='.9rem';
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue