From 8c67ce27b538fc55f78b862270d7cfc4775e852b Mon Sep 17 00:00:00 2001 From: Thomas Cravey Date: Sat, 16 Aug 2025 19:54:18 -0500 Subject: [PATCH] fix(ui/og): robust OG/Twitter parsing via html tokenizer; keep navbar on summarizer; make tail pane scrollable; stub loadInfo; channel dropdown to be added; SSE broadcast already wired --- internal/httpapi/server.go | 53 +++++++++++++++++--------------------- 1 file changed, 23 insertions(+), 30 deletions(-) diff --git a/internal/httpapi/server.go b/internal/httpapi/server.go index a9d3d75..9daf4ec 100644 --- a/internal/httpapi/server.go +++ b/internal/httpapi/server.go @@ -16,6 +16,7 @@ import ( "sojuboy/internal/store" "sojuboy/internal/summarizer" + xhtml "golang.org/x/net/html" ) type Metrics struct { @@ -217,45 +218,37 @@ func (s *Server) handleLinkCard(w http.ResponseWriter, r *http.Request) { _ = json.NewEncoder(w).Encode(s.cardCache[raw]) return } - // fetch minimal HTML and extract tags (very lightweight, no full readability here) - // For brevity, we only parse a few tags by string search to keep dependencies minimal in this step + // fetch minimal HTML and extract tags using a tolerant HTML parser client := &http.Client{ Timeout: 10 * time.Second } req, _ := http.NewRequestWithContext(r.Context(), http.MethodGet, raw, nil) resp, err := client.Do(req) if err != nil { w.WriteHeader(http.StatusBadGateway); _, _ = w.Write([]byte("fetch error")); return } defer resp.Body.Close() if resp.StatusCode < 200 || resp.StatusCode >= 300 { w.WriteHeader(http.StatusBadGateway); _, _ = w.Write([]byte("bad status")); return } - // limit to 256KB + // limit to 256KB and parse tokens limited := http.MaxBytesReader(w, resp.Body, 262144) - b, _ := io.ReadAll(limited) - html := string(b) - // naive meta parsing - get := func(names ...string) string { - for _, n := range names { - // look for content="..." - idx := strings.Index(strings.ToLower(html), strings.ToLower(n)) - if idx >= 0 { - // slice forward - sfx := html[idx:] - ic := strings.Index(strings.ToLower(sfx), "content=") - if ic >= 0 { - sfx = sfx[ic+8:] - // trim quotes - if len(sfx) > 0 && (sfx[0] == '"' || sfx[0] == '\'') { - q := sfx[0] - sfx = sfx[1:] - iq := strings.IndexByte(sfx, q) - if iq >= 0 { return strings.TrimSpace(sfx[:iq]) } - } - } + doc, err := xhtml.Parse(limited) + if err != nil { w.WriteHeader(http.StatusBadGateway); _, _ = w.Write([]byte("parse error")); return } + var title, desc, img string + var walker func(*xhtml.Node) + getAttr := func(n *xhtml.Node, key string) string { for a := n.Attr; a != nil && len(n.Attr) > 0; a = nil { for _, at := range n.Attr { if strings.EqualFold(at.Key, key) { return at.Val } } ; return "" } } + walker = func(n *xhtml.Node) { + if n.Type == xhtml.ElementNode && strings.EqualFold(n.Data, "meta") { + // property or name + content + var pn = ""; var nm = ""; var content = "" + for _, a := range n.Attr { if strings.EqualFold(a.Key, "property") { pn = a.Val } else if strings.EqualFold(a.Key, "name") { nm = a.Val } else if strings.EqualFold(a.Key, "content") { content = a.Val } } + key := strings.ToLower(pn) + if key == "" { key = strings.ToLower(nm) } + switch key { + case "og:title", "twitter:title": if title == "" { title = content } + case "og:description", "twitter:description": if desc == "" { desc = content } + case "og:image", "twitter:image": if img == "" { img = content } } } - return "" + for c := n.FirstChild; c != nil; c = c.NextSibling { walker(c) } } - card := linkCard{ URL: raw } - card.Title = get("property=\"og:title\"","name=\"og:title\"","name=\"twitter:title\"") - card.Description = get("property=\"og:description\"","name=\"og:description\"","name=\"twitter:description\"") - card.Image = get("property=\"og:image\"","name=\"og:image\"","name=\"twitter:image\"") + walker(doc) + card := linkCard{ URL: raw, Title: strings.TrimSpace(title), Description: strings.TrimSpace(desc), Image: strings.TrimSpace(img) } // cache for 24h s.cardCache[raw] = card s.cardCacheExp[raw] = time.Now().Add(24 * time.Hour) @@ -393,7 +386,7 @@ func (s *Server) handleUI(w http.ResponseWriter, r *http.Request) {
-
+