feat(cards): improve OG/Twitter parsing (more keys, absolute image URLs, larger read, UA headers); load Twitter widgets.js to render media
This commit is contained in:
parent
15f7f3ac96
commit
962469bd76
2 changed files with 69 additions and 38 deletions
|
|
@ -273,7 +273,9 @@ func (s *Server) handleLinkCard(w http.ResponseWriter, r *http.Request) {
|
||||||
// fetch minimal HTML and extract tags using a tolerant HTML parser
|
// fetch minimal HTML and extract tags using a tolerant HTML parser
|
||||||
client := &http.Client{Timeout: 10 * time.Second}
|
client := &http.Client{Timeout: 10 * time.Second}
|
||||||
req, _ := http.NewRequestWithContext(r.Context(), http.MethodGet, raw, nil)
|
req, _ := http.NewRequestWithContext(r.Context(), http.MethodGet, raw, nil)
|
||||||
req.Header.Set("User-Agent", "Mozilla/5.0 (compatible; sojuboy/1.0)")
|
req.Header.Set("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0 Safari/537.36")
|
||||||
|
req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8")
|
||||||
|
req.Header.Set("Accept-Language", "en-US,en;q=0.9")
|
||||||
resp, err := client.Do(req)
|
resp, err := client.Do(req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
w.WriteHeader(http.StatusBadGateway)
|
w.WriteHeader(http.StatusBadGateway)
|
||||||
|
|
@ -296,8 +298,8 @@ func (s *Server) handleLinkCard(w http.ResponseWriter, r *http.Request) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// limit to 256KB and parse tokens
|
// limit to 768KB and parse tokens
|
||||||
limited := http.MaxBytesReader(w, resp.Body, 262144)
|
limited := http.MaxBytesReader(w, resp.Body, 786432)
|
||||||
doc, err := xhtml.Parse(limited)
|
doc, err := xhtml.Parse(limited)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
w.WriteHeader(http.StatusBadGateway)
|
w.WriteHeader(http.StatusBadGateway)
|
||||||
|
|
@ -307,36 +309,38 @@ func (s *Server) handleLinkCard(w http.ResponseWriter, r *http.Request) {
|
||||||
var title, desc, img string
|
var title, desc, img string
|
||||||
var walker func(*xhtml.Node)
|
var walker func(*xhtml.Node)
|
||||||
walker = func(n *xhtml.Node) {
|
walker = func(n *xhtml.Node) {
|
||||||
if n.Type == xhtml.ElementNode && strings.EqualFold(n.Data, "meta") {
|
if n.Type == xhtml.ElementNode {
|
||||||
// property or name + content
|
if strings.EqualFold(n.Data, "meta") {
|
||||||
var pn = ""
|
// property or name + content
|
||||||
var nm = ""
|
var pn = ""
|
||||||
var content = ""
|
var nm = ""
|
||||||
for _, a := range n.Attr {
|
var content = ""
|
||||||
if strings.EqualFold(a.Key, "property") {
|
for _, a := range n.Attr {
|
||||||
pn = a.Val
|
if strings.EqualFold(a.Key, "property") {
|
||||||
} else if strings.EqualFold(a.Key, "name") {
|
pn = a.Val
|
||||||
nm = a.Val
|
} else if strings.EqualFold(a.Key, "name") {
|
||||||
} else if strings.EqualFold(a.Key, "content") {
|
nm = a.Val
|
||||||
content = a.Val
|
} else if strings.EqualFold(a.Key, "content") {
|
||||||
|
content = a.Val
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
key := strings.ToLower(pn)
|
||||||
key := strings.ToLower(pn)
|
if key == "" {
|
||||||
if key == "" {
|
key = strings.ToLower(nm)
|
||||||
key = strings.ToLower(nm)
|
|
||||||
}
|
|
||||||
switch key {
|
|
||||||
case "og:title", "twitter:title":
|
|
||||||
if title == "" {
|
|
||||||
title = content
|
|
||||||
}
|
}
|
||||||
case "og:description", "twitter:description":
|
switch key {
|
||||||
if desc == "" {
|
case "og:title", "twitter:title":
|
||||||
desc = content
|
if title == "" {
|
||||||
}
|
title = content
|
||||||
case "og:image", "twitter:image":
|
}
|
||||||
if img == "" {
|
case "og:description", "twitter:description", "description":
|
||||||
img = content
|
if desc == "" {
|
||||||
|
desc = content
|
||||||
|
}
|
||||||
|
case "og:image", "og:image:url", "og:image:secure_url", "twitter:image", "twitter:image:src":
|
||||||
|
if img == "" {
|
||||||
|
img = content
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -345,6 +349,21 @@ func (s *Server) handleLinkCard(w http.ResponseWriter, r *http.Request) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
walker(doc)
|
walker(doc)
|
||||||
|
|
||||||
|
// normalize image URL
|
||||||
|
if img != "" {
|
||||||
|
if strings.HasPrefix(img, "//") {
|
||||||
|
if u.Scheme == "" {
|
||||||
|
u.Scheme = "https"
|
||||||
|
}
|
||||||
|
img = u.Scheme + ":" + img
|
||||||
|
} else if !strings.HasPrefix(img, "http://") && !strings.HasPrefix(img, "https://") {
|
||||||
|
if ref, err := url.Parse(img); err == nil {
|
||||||
|
img = u.ResolveReference(ref).String()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
card := linkCard{URL: raw, Title: strings.TrimSpace(title), Description: strings.TrimSpace(desc), Image: strings.TrimSpace(img)}
|
card := linkCard{URL: raw, Title: strings.TrimSpace(title), Description: strings.TrimSpace(desc), Image: strings.TrimSpace(img)}
|
||||||
// cache for 24h
|
// cache for 24h
|
||||||
s.cardCache[raw] = card
|
s.cardCache[raw] = card
|
||||||
|
|
@ -371,8 +390,12 @@ func (s *Server) handleLinkSummary(w http.ResponseWriter, r *http.Request) {
|
||||||
_, _ = w.Write([]byte("summarizer not configured"))
|
_, _ = w.Write([]byte("summarizer not configured"))
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
if s.summaryCache == nil { s.summaryCache = make(map[string]string) }
|
if s.summaryCache == nil {
|
||||||
if s.summaryCacheExp == nil { s.summaryCacheExp = make(map[string]time.Time) }
|
s.summaryCache = make(map[string]string)
|
||||||
|
}
|
||||||
|
if s.summaryCacheExp == nil {
|
||||||
|
s.summaryCacheExp = make(map[string]time.Time)
|
||||||
|
}
|
||||||
if exp, ok := s.summaryCacheExp[raw]; ok && time.Now().Before(exp) {
|
if exp, ok := s.summaryCacheExp[raw]; ok && time.Now().Before(exp) {
|
||||||
w.Header().Set("Content-Type", "application/json")
|
w.Header().Set("Content-Type", "application/json")
|
||||||
_ = json.NewEncoder(w).Encode(map[string]any{"summary": s.summaryCache[raw]})
|
_ = json.NewEncoder(w).Encode(map[string]any{"summary": s.summaryCache[raw]})
|
||||||
|
|
@ -380,8 +403,12 @@ func (s *Server) handleLinkSummary(w http.ResponseWriter, r *http.Request) {
|
||||||
}
|
}
|
||||||
msgs := []store.Message{{Channel: "#links", Author: "link", Body: raw, Time: time.Now().UTC()}}
|
msgs := []store.Message{{Channel: "#links", Author: "link", Body: raw, Time: time.Now().UTC()}}
|
||||||
tout := s.SummarizerTimeout
|
tout := s.SummarizerTimeout
|
||||||
if tout <= 0 { tout = 5 * time.Minute }
|
if tout <= 0 {
|
||||||
if tout > 2*time.Minute { tout = 2 * time.Minute }
|
tout = 5 * time.Minute
|
||||||
|
}
|
||||||
|
if tout > 2*time.Minute {
|
||||||
|
tout = 2 * time.Minute
|
||||||
|
}
|
||||||
ctx, cancel := context.WithTimeout(r.Context(), tout)
|
ctx, cancel := context.WithTimeout(r.Context(), tout)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
sum, err := s.Summarizer.Summarize(ctx, "#links", msgs, 0)
|
sum, err := s.Summarizer.Summarize(ctx, "#links", msgs, 0)
|
||||||
|
|
@ -390,7 +417,9 @@ func (s *Server) handleLinkSummary(w http.ResponseWriter, r *http.Request) {
|
||||||
_, _ = w.Write([]byte("summarizer error"))
|
_, _ = w.Write([]byte("summarizer error"))
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
if sum == "" { sum = "(no summary)" }
|
if sum == "" {
|
||||||
|
sum = "(no summary)"
|
||||||
|
}
|
||||||
s.summaryCache[raw] = sum
|
s.summaryCache[raw] = sum
|
||||||
s.summaryCacheExp[raw] = time.Now().Add(24 * time.Hour)
|
s.summaryCacheExp[raw] = time.Now().Add(24 * time.Hour)
|
||||||
w.Header().Set("Content-Type", "application/json")
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
// Shared state
|
// Shared state
|
||||||
const st = { tailLoading: false, atBottom: true, current: '#', earliest: null, sse: null, channels: [] };
|
const st = { tailLoading: false, atBottom: true, current: '#', earliest: null, sse: null, channels: [], twLoaded: false };
|
||||||
|
|
||||||
function measureBars(){
|
function measureBars(){
|
||||||
const hdr = document.querySelector('header.nav');
|
const hdr = document.querySelector('header.nav');
|
||||||
|
|
@ -26,6 +26,8 @@ async function api(path, params){
|
||||||
return res.text();
|
return res.text();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function ensureTwitterWidgets(){ if(st.twLoaded) return; st.twLoaded = true; const s=document.createElement('script'); s.async=true; s.src='https://platform.twitter.com/widgets.js'; document.head.appendChild(s); }
|
||||||
|
|
||||||
function appendBatch(arr){ const el=document.getElementById('tail'); const frag=document.createDocumentFragment(); arr.forEach(m=>{ const div=document.createElement('div'); div.className='msg'; div.innerHTML=lineHTML(m); frag.appendChild(div); processLinks(div); }); el.appendChild(frag); pinBottomMulti(); }
|
function appendBatch(arr){ const el=document.getElementById('tail'); const frag=document.createDocumentFragment(); arr.forEach(m=>{ const div=document.createElement('div'); div.className='msg'; div.innerHTML=lineHTML(m); frag.appendChild(div); processLinks(div); }); el.appendChild(frag); pinBottomMulti(); }
|
||||||
function prependBatch(arr){ const el=document.getElementById('tail'); const oldTop=el.firstChild; const beforeTop = oldTop ? oldTop.getBoundingClientRect().top : 0; const frag=document.createDocumentFragment(); arr.forEach(m=>{ const div=document.createElement('div'); div.className='msg'; div.innerHTML=lineHTML(m); frag.appendChild(div); processLinks(div); }); el.insertBefore(frag, el.firstChild); if(oldTop){ const afterTop = oldTop.getBoundingClientRect().top; const delta = afterTop - beforeTop; window.scrollBy(0, delta); } }
|
function prependBatch(arr){ const el=document.getElementById('tail'); const oldTop=el.firstChild; const beforeTop = oldTop ? oldTop.getBoundingClientRect().top : 0; const frag=document.createDocumentFragment(); arr.forEach(m=>{ const div=document.createElement('div'); div.className='msg'; div.innerHTML=lineHTML(m); frag.appendChild(div); processLinks(div); }); el.insertBefore(frag, el.firstChild); if(oldTop){ const afterTop = oldTop.getBoundingClientRect().top; const delta = afterTop - beforeTop; window.scrollBy(0, delta); } }
|
||||||
|
|
||||||
|
|
@ -43,7 +45,7 @@ function processLinks(scope){ const links = scope.querySelectorAll('a[href]:not(
|
||||||
const row = document.createElement('div'); row.style.display='flex'; row.style.alignItems='flex-start'; row.style.gap='.5rem'; row.innerHTML = html;
|
const row = document.createElement('div'); row.style.display='flex'; row.style.alignItems='flex-start'; row.style.gap='.5rem'; row.innerHTML = html;
|
||||||
c.appendChild(row);
|
c.appendChild(row);
|
||||||
c.querySelectorAll('img').forEach(img=> img.addEventListener('load', ()=> pinBottomMulti()));
|
c.querySelectorAll('img').forEach(img=> img.addEventListener('load', ()=> pinBottomMulti()));
|
||||||
if(card.html){ const wrap=document.createElement('div'); wrap.innerHTML=card.html; c.appendChild(wrap); }
|
if(card.html){ const wrap=document.createElement('div'); wrap.innerHTML=card.html; c.appendChild(wrap); ensureTwitterWidgets(); }
|
||||||
// Summary control row
|
// Summary control row
|
||||||
const ctrl = document.createElement('div'); ctrl.style.marginTop='.25rem';
|
const ctrl = document.createElement('div'); ctrl.style.marginTop='.25rem';
|
||||||
const btn = document.createElement('button'); btn.type='button'; btn.title='Summarize this link'; btn.textContent='\u25B6'; btn.style.padding='0 .4rem'; btn.style.fontSize='.9rem';
|
const btn = document.createElement('button'); btn.type='button'; btn.title='Summarize this link'; btn.textContent='\u25B6'; btn.style.padding='0 .4rem'; btn.style.fontSize='.9rem';
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue