move html content to specific folder, remove yarn.lock

This commit is contained in:
theofficialomega
2025-07-10 11:03:36 +02:00
parent adba8923f3
commit 3340661179
2 changed files with 110 additions and 56 deletions

View File

@@ -1,28 +1,80 @@
const axios = require('axios')
const cheerio = require('cheerio')
const url = require('url')
const { DateTime } = require('luxon')
let cachedToken = null
let tokenExpiry = null
async function getToken() {
if (cachedToken && tokenExpiry && DateTime.now() < tokenExpiry) {
return cachedToken
}
try {
const response = await axios.get('https://tv.dir.bg/init')
// Check different possible locations for the token
let token = null
if (response.data && response.data.csrfToken) {
token = response.data.csrfToken
}
if (token) {
cachedToken = token
tokenExpiry = DateTime.now().plus({ hours: 1 })
return token
} else {
console.error('CSRF token not found in response structure:', Object.keys(response.data || {}))
return null
}
} catch (error) {
console.error('Error fetching token:', error.message)
return null
}
}
module.exports = {
site: 'tv.dir.bg',
days: 2,
url({ channel, date }) {
return `https://tv.dir.bg/tv_channel.php?id=${channel.site_id}&dd=${date.format('DD.MM')}`
async url({ channel, date }) {
const token = await getToken()
if (!token) {
throw new Error('Unable to retrieve CSRF token')
}
const form = new url.URLSearchParams({
_token: token,
channel: channel.site_id,
day: date.format('YYYY-MM-DD')
})
return axios.post('https://tv.dir.bg/load/programs', form.toString(), {
headers: {
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
'X-Requested-With': 'XMLHttpRequest'
}
})
},
parser({ content, date }) {
const programs = []
const items = parseItems(content)
items.forEach(item => {
const $item = cheerio.load(item)
const prev = programs[programs.length - 1]
let start = parseStart($item, date)
if (!start) return
if (prev) {
if (start < prev.start) {
start = start.plus({ days: 1 })
date = date.add(1, 'd')
date = date.plus({ days: 1 })
}
prev.stop = start
}
const stop = start.plus({ minutes: 30 })
programs.push({
title: parseTitle($item),
@@ -34,55 +86,57 @@ module.exports = {
return programs
},
async channels() {
const requests = [
axios.get('https://tv.dir.bg/programata.php?t=0'),
axios.get('https://tv.dir.bg/programata.php?t=1')
]
try {
const response = await axios.get('https://tv.dir.bg/channels')
const $ = cheerio.load(response.data)
const items = await Promise.all(requests)
.then(r => {
return r
.map(i => {
const html = i.data
const $ = cheerio.load(html)
return $('#programa-left > div > div > div > a').toArray()
})
.reduce((acc, curr) => {
acc = acc.concat(curr)
return acc
}, [])
})
.catch(console.log)
const channels = []
const $ = cheerio.load('')
return items.map(item => {
const $item = $(item)
return {
$('.channel_cont').each((index, element) => {
const $element = $(element)
const $link = $element.find('a.channel_link')
const href = $link.attr('href')
const $img = $element.find('img')
const name = $img.attr('alt')
const logo = $img.attr('src')
const site_id = href ? href.match(/\/programa\/(\d+)/)?.[1] : ''
if (site_id && name) {
channels.push({
lang: 'bg',
site_id: $item.attr('href').replace('tv_channel.php?id=', ''),
name: $item.find('div.thumbnail > img').attr('alt')
}
site_id: site_id,
name: name,
logo: logo
})
}
})
return channels
} catch (error) {
console.error('Error fetching channels:', error)
return []
}
}
}
function parseStart($item, date) {
const time = $item('i').text()
if (!time) return null
const dateString = `${date.format('MM/DD/YYYY')} ${time}`
const timeText = $item('.broadcast-time').text().trim()
if (!timeText) return null
return DateTime.fromFormat(dateString, 'MM/dd/yyyy HH.mm', { zone: 'Europe/Sofia' }).toUTC()
const [hours, minutes] = timeText.split(':').map(Number)
const dateTime = date.isValid ? date : DateTime.fromISO(date)
return dateTime.set({ hour: hours, minute: minutes, second: 0, millisecond: 0 })
}
function parseTitle($item) {
return $item
.text()
.replace(/^\d{2}.\d{2}/, '')
.trim()
return $item('.broadcast-title').text().trim()
}
function parseItems(content) {
const $ = cheerio.load(content)
return $('#events > li').toArray()
return $('.broadcast-item').toArray()
}

View File

@@ -1,23 +1,24 @@
const { parser, url } = require('./tv.dir.bg.config.js')
const fs = require('fs')
const path = require('path')
const dayjs = require('dayjs')
const utc = require('dayjs/plugin/utc')
const customParseFormat = require('dayjs/plugin/customParseFormat')
dayjs.extend(customParseFormat)
dayjs.extend(utc)
const date = dayjs.utc('2022-01-20', 'YYYY-MM-DD').startOf('d')
const date = dayjs.utc('2025-06-30', 'YYYY-MM-DD').startOf('d')
const channel = {
site_id: '12',
xmltv_id: 'BTV.bg'
}
it('can generate valid url', () => {
expect(url({ channel, date })).toBe('https://tv.dir.bg/tv_channel.php?id=12&dd=20.01')
expect(url({ channel, date })).toBe('https://tv.dir.bg/programa/12')
})
it('can parse response', () => {
const content =
'<!DOCTYPE html><html><head></head><body><div class="container" id="news"><div class="row"><div class="col-sm-12 col-md-5"><ul id="events"><li><i></i> <div class="progress"> <div class="progress-bar progress-bar-striped active" role="progressbar" style="width:99%"> </div></div></li><li><a href="tv_show_info.php?id"="10"><i>06.00</i>„<b>Тази сутрин</b>” - информационно предаване с водещи Златимир Йочеви Биляна Гавазова</a></li><li><i>15.00</i>„Доктор Чудо” - сериал, еп.71</li><li><a href="tv_show_info.php?id"="1601"><i>05.30</i>„<b>Лице в лице</b>” /п./ </a></li></ul></div></div></div></body></html>'
const content = fs.readFileSync(path.resolve(__dirname, '__data__/content.html'))
const result = parser({ content, date }).map(p => {
p.start = p.start.toJSON()
p.stop = p.stop.toJSON()
@@ -26,19 +27,19 @@ it('can parse response', () => {
expect(result).toMatchObject([
{
start: '2022-01-20T04:00:00.000Z',
stop: '2022-01-20T13:00:00.000Z',
title: '„Тази сутрин” - информационно предаване с водещи Златимир Йочеви Биляна Гавазова'
start: '2025-06-30T08:00:00.000Z',
stop: '2025-06-30T10:00:00.000Z',
title: 'Купа на Франция: Еспали - Пари Сен Жермен'
},
{
start: '2022-01-20T13:00:00.000Z',
stop: '2022-01-21T03:30:00.000Z',
title: '„Доктор Чудо” - сериал, еп.71'
start: '2025-06-30T10:00:00.000Z',
stop: '2025-06-30T12:00:00.000Z',
title: 'Ла Лига: Леганес - Реал Сосиедад'
},
{
start: '2022-01-21T03:30:00.000Z',
stop: '2022-01-21T04:00:00.000Z',
title: '„Лице в лице” /п./'
start: '2025-06-30T12:00:00.000Z',
stop: '2025-06-30T13:00:00.000Z',
title: 'Пред Стадиона&quot; - спортно шоу'
}
])
})
@@ -47,8 +48,7 @@ it('can handle empty guide', () => {
const result = parser({
date,
channel,
content:
'<!DOCTYPE html><html><head></head><body><div class="container" id="news"><div class="row"><div class="col-sm-12 col-md-5"><ul id="events"></ul></div></div></div></body></html>'
content: fs.readFileSync(path.resolve(__dirname, '__data__/no_data.html'))
})
expect(result).toMatchObject([])
})