feat: two-pass merge with first-source-wins

This commit is contained in:
2026-05-06 11:12:31 +02:00
parent 1c7de7174d
commit b592a0e229
2 changed files with 345 additions and 0 deletions
+226
View File
@@ -0,0 +1,226 @@
package main
import (
"compress/gzip"
"context"
"encoding/xml"
"fmt"
"io"
"net/http"
"os"
"strings"
)
// fetchEPGSource downloads url into dest as plain XML, transparently
// gunzipping if the URL ends in .gz or the response Content-Type indicates
// gzip. Streaming — the body never lives fully in memory.
func fetchEPGSource(ctx context.Context, client *http.Client, url, dest string) error {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
if err != nil {
return err
}
resp, err := client.Do(req)
if err != nil {
return err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return fmt.Errorf("epg fetch %s: %s", url, resp.Status)
}
src := io.Reader(resp.Body)
if responseLooksGzipped(url, resp) {
gr, err := gzip.NewReader(resp.Body)
if err != nil {
return fmt.Errorf("epg fetch %s: gzip: %w", url, err)
}
defer gr.Close()
src = gr
}
f, err := os.Create(dest)
if err != nil {
return err
}
defer f.Close()
if _, err := io.Copy(f, src); err != nil {
return err
}
return f.Sync()
}
func responseLooksGzipped(url string, resp *http.Response) bool {
base := strings.ToLower(strings.SplitN(url, "?", 2)[0])
if strings.HasSuffix(base, ".gz") {
return true
}
if strings.Contains(strings.ToLower(resp.Header.Get("Content-Type")), "gzip") {
return true
}
return false
}
// mergeEPG writes a merged XMLTV stream to dst, drawing from the given
// sources. Channels not covered by tvgIDMap are dropped; matched channels
// have their ids rewritten to the canonical (playlist) form. When multiple
// sources cover the same channel, the first one wins and programmes from
// other sources for that channel are dropped.
//
// Each source is read twice (channels, then programmes), which is why the
// argument is io.ReadSeeker rather than io.Reader.
func mergeEPG(sources []io.ReadSeeker, tvgIDMap map[string]string, dst io.Writer) error {
if _, err := io.WriteString(dst, xml.Header); err != nil {
return err
}
enc := xml.NewEncoder(dst)
tv := xml.StartElement{
Name: xml.Name{Local: "tv"},
Attr: []xml.Attr{{Name: xml.Name{Local: "generator-info-name"}, Value: "pz8-relay"}},
}
if err := enc.EncodeToken(tv); err != nil {
return err
}
ownership := make(map[string]int)
for i, src := range sources {
if _, err := src.Seek(0, io.SeekStart); err != nil {
return err
}
if err := emitChannels(src, enc, tvgIDMap, ownership, i); err != nil {
return fmt.Errorf("source %d channels: %w", i, err)
}
}
for i, src := range sources {
if _, err := src.Seek(0, io.SeekStart); err != nil {
return err
}
if err := emitProgrammes(src, enc, tvgIDMap, ownership, i); err != nil {
return fmt.Errorf("source %d programmes: %w", i, err)
}
}
if err := enc.EncodeToken(tv.End()); err != nil {
return err
}
return enc.Close()
}
func emitChannels(r io.Reader, enc *xml.Encoder, tvgIDMap map[string]string, ownership map[string]int, sourceIdx int) error {
dec := xml.NewDecoder(r)
for {
tok, err := dec.Token()
if err == io.EOF {
return nil
}
if err != nil {
return err
}
start, ok := tok.(xml.StartElement)
if !ok || start.Name.Local != "channel" {
continue
}
idIdx := findAttr(start.Attr, "id")
if idIdx < 0 {
if err := dec.Skip(); err != nil {
return err
}
continue
}
canonical, matched := tvgIDMap[normalizeChannelID(start.Attr[idIdx].Value)]
if !matched {
if err := dec.Skip(); err != nil {
return err
}
continue
}
if _, taken := ownership[canonical]; taken {
if err := dec.Skip(); err != nil {
return err
}
continue
}
ownership[canonical] = sourceIdx
start.Attr[idIdx].Value = canonical
if err := copyElement(dec, enc, start); err != nil {
return err
}
}
}
func emitProgrammes(r io.Reader, enc *xml.Encoder, tvgIDMap map[string]string, ownership map[string]int, sourceIdx int) error {
dec := xml.NewDecoder(r)
for {
tok, err := dec.Token()
if err == io.EOF {
return nil
}
if err != nil {
return err
}
start, ok := tok.(xml.StartElement)
if !ok || start.Name.Local != "programme" {
continue
}
chIdx := findAttr(start.Attr, "channel")
if chIdx < 0 {
if err := dec.Skip(); err != nil {
return err
}
continue
}
canonical, matched := tvgIDMap[normalizeChannelID(start.Attr[chIdx].Value)]
if !matched {
if err := dec.Skip(); err != nil {
return err
}
continue
}
if owner, ok := ownership[canonical]; !ok || owner != sourceIdx {
if err := dec.Skip(); err != nil {
return err
}
continue
}
start.Attr[chIdx].Value = canonical
if err := copyElement(dec, enc, start); err != nil {
return err
}
}
}
func findAttr(attrs []xml.Attr, name string) int {
for i, a := range attrs {
if a.Name.Local == name {
return i
}
}
return -1
}
// copyElement emits start and every token up to its matching end element,
// using depth tracking to handle nested elements.
func copyElement(dec *xml.Decoder, enc *xml.Encoder, start xml.StartElement) error {
if err := enc.EncodeToken(start); err != nil {
return err
}
depth := 1
for depth > 0 {
tok, err := dec.Token()
if err != nil {
return err
}
if err := enc.EncodeToken(tok); err != nil {
return err
}
switch tok.(type) {
case xml.StartElement:
depth++
case xml.EndElement:
depth--
}
}
return nil
}