From d92e90a93d9a58da59097920e6f98ddce7a7d965 Mon Sep 17 00:00:00 2001 From: GreenSnakeLinux Date: Thu, 3 Jul 2025 19:12:53 +0200 Subject: [PATCH] add new guide_tnt site --- sites/guidetnt.com/__data__/content.html | 1087 ++++++++++++++++++ sites/guidetnt.com/__data__/no_content.html | 390 +++++++ sites/guidetnt.com/guidetnt.com.channels.xml | 73 ++ sites/guidetnt.com/guidetnt.com.config.js | 333 ++++++ sites/guidetnt.com/guidetnt.com.test.js | 83 ++ sites/guidetnt.com/readme.md | 21 + 6 files changed, 1987 insertions(+) create mode 100644 sites/guidetnt.com/__data__/content.html create mode 100644 sites/guidetnt.com/__data__/no_content.html create mode 100644 sites/guidetnt.com/guidetnt.com.channels.xml create mode 100755 sites/guidetnt.com/guidetnt.com.config.js create mode 100644 sites/guidetnt.com/guidetnt.com.test.js create mode 100644 sites/guidetnt.com/readme.md diff --git a/sites/guidetnt.com/__data__/content.html b/sites/guidetnt.com/__data__/content.html new file mode 100644 index 00000000..0c17d1ba --- /dev/null +++ b/sites/guidetnt.com/__data__/content.html @@ -0,0 +1,1087 @@ + + + + + Programme TF1 - GuideTNT.com + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+
+
+ + +
+ + + +
+
+
+ + Instagram + +
+
+
+
+ + Twitter + +
+
+ + + + +
+
+ + + +
+ + + + +
+ + + + +
+ + + + + + + + + diff --git a/sites/guidetnt.com/__data__/no_content.html b/sites/guidetnt.com/__data__/no_content.html new file mode 100644 index 00000000..d8e4edd3 --- /dev/null +++ b/sites/guidetnt.com/__data__/no_content.html @@ -0,0 +1,390 @@ + + + + + Programme TF1 - GuideTNT.com + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+
+
+ + +
+ + + +
+
+
+ + Instagram + +
+
+
+
+ + Twitter + +
+
+ + + + +
+
+ + + +
+ + + + +
+ + + + +
+ + + + + + + + + diff --git a/sites/guidetnt.com/guidetnt.com.channels.xml b/sites/guidetnt.com/guidetnt.com.channels.xml new file mode 100644 index 00000000..42bac458 --- /dev/null +++ b/sites/guidetnt.com/guidetnt.com.channels.xml @@ -0,0 +1,73 @@ + + + TF1 + France 2 + France 3 + France 4 + France 5 + M6 + Arte + W9 + TMC + TFX + TF1 Séries Films + CSTAR + Chérie 25 + T18 + 6ter + RMC STORY + TV5MONDE + LCP / Public Senat + Gulli + Disney Channel + Canal+ Kids + AB1 + MTV + E! Entertainment + 13ème RUE + + Polar+ + Comedie+ + Comedy Central + BET + MCM + Paramount Channel + Warner TV + RTL9 + Syfy + Action + TCM + Canal+ + Canal+ Cinéma(s) + Canal+ Grand écran + Canal+ Box office + Canal+ Séries + Canal+ Docs + OCS + Cine+ Frisson + Cine+ Classic + Cine+ Festival + Cine+ Emotion + Cine+ Family + Histoire + Toute l'histoire + Crime District + Discovery Channel + Science&Vie TV + RMC Découverte + Planète+ + Planète+ Crime + Planète+ Aventure + National Geographic + Nat Geo Wild + Animaux + Ushuaia TV + Trek + L'Equipe + Eurosport 1 + Eurosport 2 + Automoto + Canal+ Sport + Canal+ Sport 360 + Canal+ Foot + diff --git a/sites/guidetnt.com/guidetnt.com.config.js b/sites/guidetnt.com/guidetnt.com.config.js new file mode 100755 index 00000000..9e00934c --- /dev/null +++ b/sites/guidetnt.com/guidetnt.com.config.js @@ -0,0 +1,333 @@ +const cheerio = require('cheerio') +const axios = require('axios') +const dayjs = require('dayjs') +const customParseFormat = require('dayjs/plugin/customParseFormat') +const utc = require('dayjs/plugin/utc') +const timezone = require('dayjs/plugin/timezone') +require('dayjs/locale/fr') + +dayjs.extend(customParseFormat) +dayjs.extend(utc) +dayjs.extend(timezone) + +const PARIS_TZ = 'Europe/Paris' + +module.exports = { + site: 'guidetnt.com', + days: 2, + url({ channel, date }) { + const now = dayjs() + const demain = now.add(1, 'd') + if (date && date.isSame(demain, 'day')) { + return `https://www.guidetnt.com/tv-demain/programme-${channel.site_id}` + } else if (!date || date.isSame(now, 'day')) { + return `https://www.guidetnt.com/tv/programme-${channel.site_id}` + } else { + return null + } + }, + async parser({ content, date }) { + const programs = [] + const allItems = parseItems(content) + const items = allItems?.rows + const itemDate = allItems?.formattedDate + for (const item of items) { + const prev = programs[programs.length - 1] + const $item = cheerio.load(item) + const title = parseTitle($item) + let start = parseStart($item, itemDate) + + if (!start || !title) return + if (prev) { + if (start.isBefore(prev.start)) { + start = start.add(1, 'd') + date = date.add(1, 'd') + } + prev.stop = start + } + let stop = start.add(30, 'm') + + let itemDetails = null + let subTitle = null + //let duration = null + let country = null + let productionDate = null + let episode = null + let season = null + let category = parseCategory($item) + let description = parseDescription($item) + const itemDetailsURL = parseDescriptionURL($item) + if(itemDetailsURL) { + const url = 'https://www.guidetnt.com' + itemDetailsURL + try { + const response = await axios.get(url) + itemDetails = parseItemDetails(response.data) + } catch (err) { + console.error(`Erreur lors du fetch des détails pour l'item: ${url}`, err) + } + + const timeRange = parseTimeRange(itemDetails?.programHour, date.format('YYYY-MM-DD')) + start = timeRange?.start + stop = timeRange?.stop + + subTitle = itemDetails?.subTitle + if (title == subTitle) subTitle = null + description = itemDetails?.description + + const categoryDetails = parseCategoryText(itemDetails?.category) + //duration = categoryDetails?.duration + country = categoryDetails?.country + productionDate = categoryDetails?.productionDate + season = categoryDetails?.season + episode = categoryDetails?.episode + } + // See https://www.npmjs.com/package/epg-parser for parameters + programs.push({ + title, + subTitle: subTitle, + description: description, + image: itemDetails?.image, + category: category, + directors: itemDetails?.directorActors?.Réalisateur, + actors: itemDetails?.directorActors?.Acteur, + country: country, + date: productionDate, + //duration: duration, // Tried with length: too, but does not work ! (stop-start is not accurate because of Ads) + season: season, + episode: episode, + start, + stop + }) + } + + return programs + }, + async channels() { + const response = await axios.get('https://www.guidetnt.com') + const channels = [] + const $ = cheerio.load(response.data) + + // Look inside each .tvlogo container + $('.tvlogo').each((i, el) => { + // Find all descendants that have an alt attribute + $(el).find('[alt]').each((j, subEl) => { + const alt = $(subEl).attr('alt') + const href = $(subEl).attr('href') + if (href && alt && alt.trim() !== '') { + const name = alt.trim() + const site_id = href.replace(/^\/tv\/programme-/, '') + channels.push({ + lang: 'fr', + name, + site_id + }) + } + }) + }) + return channels + } +} + +function parseTimeRange(timeRange, baseDate) { + // Split times + const [startStr, endStr] = timeRange.split(' - ').map(s => s.trim()) + + // Parse with base date + const start = dayjs(`${baseDate} ${startStr}`, 'YYYY-MM-DD HH:mm') + let end = dayjs(`${baseDate} ${endStr}`, 'YYYY-MM-DD HH:mm') + + // Handle possible day wrap (e.g., 23:30 - 00:15) + if (end.isBefore(start)) { + end = end.add(1, 'day') + } + + // Calculate duration in minutes + const diffMinutes = end.diff(start, 'minute') + + return { + start: start.format(), + stop: end.format(), + duration: diffMinutes + } +} + +function parseItemDetails(itemDetails) { + const $ = cheerio.load(itemDetails) + + const program = $('.program-wrapper').first() + + const programHour = program.find('.program-hour').text().trim() + const programTitle = program.find('.program-title').text().trim() + const programElementBold = program.find('.program-element-bold').text().trim() + const programArea1 = program.find('.program-element.program-area-1').text().trim() + + let description = '' + const programElements = $('.program-element').filter((i, el) => { + const classAttr = $(el).attr('class') + // Return true only if it is exactly "program-element" (no extra classes) + return classAttr.trim() === 'program-element' + }) + + programElements.each((i, el) => { + description += $(el).text().trim() + }) + + const area2Node = $('.program-area-2').first() + const area2 = $(area2Node) + const data = {} + let currentLabel = null + let texts = [] + + area2.contents().each((i, node) => { + if (node.type === 'tag' && node.name === 'strong') { + // If we had collected some text for the previous label, save it + if (currentLabel && texts.length) { + data[currentLabel] = texts.join('').trim().replace(/,\s*$/, '') // Remove trailing comma + } + // New label - get text without colon + currentLabel = $(node).text().replace(/:$/, '').trim() + texts = [] + } else if (currentLabel) { + // Append the text content (text node or others) + if (node.type === 'text') { + texts.push(node.data) + } else if (node.type === 'tag' && node.name !== 'strong' && node.name !== 'br') { + texts.push($(node).text()) + } + } + }) + + // Save last label text + if (currentLabel && texts.length) { + data[currentLabel] = texts.join('').trim().replace(/,\s*$/, '') + } + + const imgSrc = program.find('div[style*="float:left"]')?.find('img')?.attr('src') || null + + return { + programHour, + title: programTitle, + subTitle: programElementBold, + category: programArea1, + description: description, + directorActors: data, + image: imgSrc + } +} + +function parseCategoryText(text) { + if (!text) return null + + const parts = text.split(',').map(s => s.trim()).filter(Boolean) + const len = parts.length + + const category = parts[0] || null + + if (len < 3) { + return { + category: category, + duration: null, + country: null, + productionDate: null, + season: null, + episode: null + } + } + + // Check last part: date if numeric + const dateCandidate = parts[len - 1] + const productionDate = /^\d{4}$/.test(dateCandidate) ? dateCandidate : null + + // Check for duration (first part containing "minutes") + let durationMinute = null + //let duration = null + let episode = null + let season = null + let durationIndex = -1 + for (let i = 0; i < len; i++) { + if (parts[i].toLowerCase().includes('minute')) { + durationMinute = parts[i].trim() + durationMinute = durationMinute.replace('minutes', '') + durationMinute = durationMinute.replace('minute', '') + //duration = [{ units: 'minutes', value: durationMinute }], + durationIndex = i + } else if (parts[i].toLowerCase().includes('épisode')) { + const match = text.match(/épisode\s+(\d+)(?:\/(\d+))?/i) + if (match) { + episode = parseInt(match[1], 10) + } + } else if (parts[i].toLowerCase().includes('saison')) { + season = parts[i].replace('saison', '').trim() + } + } + + // Country: second to last + const countryIndex = len - 2 + let country = (durationIndex === countryIndex) ? null : parts[countryIndex] + + return { + category, + durationMinute, + country, + productionDate, + season, + episode + } +} + +function parseTitle($item) { + return $item('.channel-programs-title a').text().trim() +} + +function parseDescription($item) { + return $item('#descr').text().trim() || null +} + +function parseDescriptionURL($item) { + const descrLink = $item('#descr a') + return descrLink.attr('href') || null +} + +function parseCategory($item) { + let type = null + $item('.channel-programs-title span').each((i, span) => { + const className = $item(span).attr('class') + if (className && className.startsWith('text_bg')) { + type = $item(span).text().trim() + } + }) + return type +} + +function parseStart($item, itemDate) { + const dt = $item('.channel-programs-time a').text().trim() + if (!dt) return null + + const datetimeStr = `${itemDate} ${dt}` + return dayjs.tz(datetimeStr, 'YYYY-MM-DD HH:mm', PARIS_TZ) +} + +function parseItems(content) { + const $ = cheerio.load(content) + + // Extract header information + const logoSrc = $('#logo img').attr('src') + const title = $('#title h1').text().trim() + const subtitle = $('#subtitle').text().trim() + const dateMatch = subtitle.match(/(\d{1,2} \w+ \d{4})/) + const dateStr = dateMatch ? dateMatch[1].toLowerCase() : null + + // Parse the French date string + const parsedDate = dayjs(dateStr, 'D MMMM YYYY', 'fr') + // Format it as YYYY-MM-DD + const formattedDate = parsedDate.format('YYYY-MM-DD') + + const rows = $('.channel-row').toArray() + + return { + rows, + logoSrc, + title, + formattedDate + } +} \ No newline at end of file diff --git a/sites/guidetnt.com/guidetnt.com.test.js b/sites/guidetnt.com/guidetnt.com.test.js new file mode 100644 index 00000000..0ee3906a --- /dev/null +++ b/sites/guidetnt.com/guidetnt.com.test.js @@ -0,0 +1,83 @@ +const { parser, url } = require('./guidetnt.com.config.js') +const fs = require('fs') +const path = require('path') +const dayjs = require('dayjs') +const utc = require('dayjs/plugin/utc') +const customParseFormat = require('dayjs/plugin/customParseFormat') +const timezone = require('dayjs/plugin/timezone') +require('dayjs/locale/fr') +dayjs.extend(customParseFormat) +dayjs.extend(utc) +dayjs.extend(timezone) + +const date = dayjs.utc('2025-07-01', 'YYYY-MM-DD').startOf('d') +const channel = { + site_id: 'tf1', + xmltv_id: 'TF1.fr' +} + +it('can generate valid url', () => { + expect(url({ channel })).toBe('https://www.guidetnt.com/tv/programme-tf1') +}) + +it('can parse response', async () => { + const content = fs.readFileSync(path.resolve(__dirname, '__data__/content.html')) + let results = await parser({ content, date }) + results = results.map(p => { + p.start = p.start.toJSON() + p.stop = p.stop.toJSON() + return p + }) + + expect(results.length).toBe(29) + expect(results[0]).toMatchObject({ + category: 'Série', + description: 'Grande effervescence pour toute l\'équipe du Camping Paradis, qui prépare les Olympiades. Côté arrivants, Hélène et sa fille Eva viennent passer quelques jours dans le but d\'optimiser les révisions d\'E...', + start: '2025-06-30T22:55:00.000Z', + stop: '2025-06-30T23:45:00.000Z', + title: 'Camping Paradis' + }) + expect(results[2]).toMatchObject({ + category: 'Magazine', + description: 'Retrouvez tous vos programmes de nuit.', + start: '2025-07-01T00:55:00.000Z', + stop: '2025-07-01T04:00:00.000Z', + title: 'Programmes de la nuit' + }) + expect(results[15]).toMatchObject({ + category: 'Téléfilm', + description: 'La vie quasi parfaite de Riley bascule brutalement lorsqu\'un accident de voiture lui coûte la vie, laissant derrière elle sa famille. Alors que l\'enquête débute, l\'affaire prend une tournure étrange l...', + start: '2025-07-01T12:25:00.000Z', + stop: '2025-07-01T14:00:00.000Z', + title: 'Trahie par l\'amour' + }) +}) + +it('can parse response for current day', async () => { + const content = fs.readFileSync(path.resolve(__dirname, '__data__/content.html')) + let results = await parser({ content, date: dayjs.utc('2025-07-01', 'YYYY-MM-DD').startOf('d') }) + results = results.map(p => { + p.start = p.start.toJSON() + p.stop = p.stop.toJSON() + return p + } + ) + + expect(results.length).toBe(29) + expect(results[0]).toMatchObject({ + category: 'Série', + description: 'Grande effervescence pour toute l\'équipe du Camping Paradis, qui prépare les Olympiades. Côté arrivants, Hélène et sa fille Eva viennent passer quelques jours dans le but d\'optimiser les révisions d\'E...', + start: '2025-06-30T22:55:00.000Z', + stop: '2025-06-30T23:45:00.000Z', + title: 'Camping Paradis' + }) +}) + +it('can handle empty guide', async () => { + const results = await parser({ + date, + content: fs.readFileSync(path.resolve(__dirname, '__data__/no_content.html')) + }) + + expect(results).toEqual([]) +}) diff --git a/sites/guidetnt.com/readme.md b/sites/guidetnt.com/readme.md new file mode 100644 index 00000000..e9f8a06a --- /dev/null +++ b/sites/guidetnt.com/readme.md @@ -0,0 +1,21 @@ +# guidetnt.com + +https://www.guidetnt.com/ + +### Download the guide + +```sh +npm run grab --- --site=guidetnt.com +``` + +### Update channel list + +```sh +npm run channels:parse --- --config=./sites/guidetnt.com/guidetnt.com.config.js --output=./sites/guidetnt.com/guidetnt.com.channels.xml +``` + +### Test + +```sh +npm test --- guidetnt.com +```