feat(cards): improve OG/Twitter parsing (more keys, absolute image URLs, larger read, UA headers); load Twitter widgets.js to render media
This commit is contained in:
parent
15f7f3ac96
commit
962469bd76
2 changed files with 69 additions and 38 deletions
|
|
@ -273,7 +273,9 @@ func (s *Server) handleLinkCard(w http.ResponseWriter, r *http.Request) {
|
|||
// fetch minimal HTML and extract tags using a tolerant HTML parser
|
||||
client := &http.Client{Timeout: 10 * time.Second}
|
||||
req, _ := http.NewRequestWithContext(r.Context(), http.MethodGet, raw, nil)
|
||||
req.Header.Set("User-Agent", "Mozilla/5.0 (compatible; sojuboy/1.0)")
|
||||
req.Header.Set("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0 Safari/537.36")
|
||||
req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8")
|
||||
req.Header.Set("Accept-Language", "en-US,en;q=0.9")
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
w.WriteHeader(http.StatusBadGateway)
|
||||
|
|
@ -296,8 +298,8 @@ func (s *Server) handleLinkCard(w http.ResponseWriter, r *http.Request) {
|
|||
return
|
||||
}
|
||||
|
||||
// limit to 256KB and parse tokens
|
||||
limited := http.MaxBytesReader(w, resp.Body, 262144)
|
||||
// limit to 768KB and parse tokens
|
||||
limited := http.MaxBytesReader(w, resp.Body, 786432)
|
||||
doc, err := xhtml.Parse(limited)
|
||||
if err != nil {
|
||||
w.WriteHeader(http.StatusBadGateway)
|
||||
|
|
@ -307,36 +309,38 @@ func (s *Server) handleLinkCard(w http.ResponseWriter, r *http.Request) {
|
|||
var title, desc, img string
|
||||
var walker func(*xhtml.Node)
|
||||
walker = func(n *xhtml.Node) {
|
||||
if n.Type == xhtml.ElementNode && strings.EqualFold(n.Data, "meta") {
|
||||
// property or name + content
|
||||
var pn = ""
|
||||
var nm = ""
|
||||
var content = ""
|
||||
for _, a := range n.Attr {
|
||||
if strings.EqualFold(a.Key, "property") {
|
||||
pn = a.Val
|
||||
} else if strings.EqualFold(a.Key, "name") {
|
||||
nm = a.Val
|
||||
} else if strings.EqualFold(a.Key, "content") {
|
||||
content = a.Val
|
||||
if n.Type == xhtml.ElementNode {
|
||||
if strings.EqualFold(n.Data, "meta") {
|
||||
// property or name + content
|
||||
var pn = ""
|
||||
var nm = ""
|
||||
var content = ""
|
||||
for _, a := range n.Attr {
|
||||
if strings.EqualFold(a.Key, "property") {
|
||||
pn = a.Val
|
||||
} else if strings.EqualFold(a.Key, "name") {
|
||||
nm = a.Val
|
||||
} else if strings.EqualFold(a.Key, "content") {
|
||||
content = a.Val
|
||||
}
|
||||
}
|
||||
}
|
||||
key := strings.ToLower(pn)
|
||||
if key == "" {
|
||||
key = strings.ToLower(nm)
|
||||
}
|
||||
switch key {
|
||||
case "og:title", "twitter:title":
|
||||
if title == "" {
|
||||
title = content
|
||||
key := strings.ToLower(pn)
|
||||
if key == "" {
|
||||
key = strings.ToLower(nm)
|
||||
}
|
||||
case "og:description", "twitter:description":
|
||||
if desc == "" {
|
||||
desc = content
|
||||
}
|
||||
case "og:image", "twitter:image":
|
||||
if img == "" {
|
||||
img = content
|
||||
switch key {
|
||||
case "og:title", "twitter:title":
|
||||
if title == "" {
|
||||
title = content
|
||||
}
|
||||
case "og:description", "twitter:description", "description":
|
||||
if desc == "" {
|
||||
desc = content
|
||||
}
|
||||
case "og:image", "og:image:url", "og:image:secure_url", "twitter:image", "twitter:image:src":
|
||||
if img == "" {
|
||||
img = content
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -345,6 +349,21 @@ func (s *Server) handleLinkCard(w http.ResponseWriter, r *http.Request) {
|
|||
}
|
||||
}
|
||||
walker(doc)
|
||||
|
||||
// normalize image URL
|
||||
if img != "" {
|
||||
if strings.HasPrefix(img, "//") {
|
||||
if u.Scheme == "" {
|
||||
u.Scheme = "https"
|
||||
}
|
||||
img = u.Scheme + ":" + img
|
||||
} else if !strings.HasPrefix(img, "http://") && !strings.HasPrefix(img, "https://") {
|
||||
if ref, err := url.Parse(img); err == nil {
|
||||
img = u.ResolveReference(ref).String()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
card := linkCard{URL: raw, Title: strings.TrimSpace(title), Description: strings.TrimSpace(desc), Image: strings.TrimSpace(img)}
|
||||
// cache for 24h
|
||||
s.cardCache[raw] = card
|
||||
|
|
@ -371,8 +390,12 @@ func (s *Server) handleLinkSummary(w http.ResponseWriter, r *http.Request) {
|
|||
_, _ = w.Write([]byte("summarizer not configured"))
|
||||
return
|
||||
}
|
||||
if s.summaryCache == nil { s.summaryCache = make(map[string]string) }
|
||||
if s.summaryCacheExp == nil { s.summaryCacheExp = make(map[string]time.Time) }
|
||||
if s.summaryCache == nil {
|
||||
s.summaryCache = make(map[string]string)
|
||||
}
|
||||
if s.summaryCacheExp == nil {
|
||||
s.summaryCacheExp = make(map[string]time.Time)
|
||||
}
|
||||
if exp, ok := s.summaryCacheExp[raw]; ok && time.Now().Before(exp) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
_ = json.NewEncoder(w).Encode(map[string]any{"summary": s.summaryCache[raw]})
|
||||
|
|
@ -380,8 +403,12 @@ func (s *Server) handleLinkSummary(w http.ResponseWriter, r *http.Request) {
|
|||
}
|
||||
msgs := []store.Message{{Channel: "#links", Author: "link", Body: raw, Time: time.Now().UTC()}}
|
||||
tout := s.SummarizerTimeout
|
||||
if tout <= 0 { tout = 5 * time.Minute }
|
||||
if tout > 2*time.Minute { tout = 2 * time.Minute }
|
||||
if tout <= 0 {
|
||||
tout = 5 * time.Minute
|
||||
}
|
||||
if tout > 2*time.Minute {
|
||||
tout = 2 * time.Minute
|
||||
}
|
||||
ctx, cancel := context.WithTimeout(r.Context(), tout)
|
||||
defer cancel()
|
||||
sum, err := s.Summarizer.Summarize(ctx, "#links", msgs, 0)
|
||||
|
|
@ -390,7 +417,9 @@ func (s *Server) handleLinkSummary(w http.ResponseWriter, r *http.Request) {
|
|||
_, _ = w.Write([]byte("summarizer error"))
|
||||
return
|
||||
}
|
||||
if sum == "" { sum = "(no summary)" }
|
||||
if sum == "" {
|
||||
sum = "(no summary)"
|
||||
}
|
||||
s.summaryCache[raw] = sum
|
||||
s.summaryCacheExp[raw] = time.Now().Add(24 * time.Hour)
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue