Adjust parser due to source format change

This commit is contained in:
Arif Budiman
2025-10-20 18:36:22 -07:00
parent eebfcc8278
commit a8252e060c
3 changed files with 61 additions and 50 deletions

File diff suppressed because one or more lines are too long

View File

@@ -2,14 +2,13 @@ const dayjs = require('dayjs')
module.exports = { module.exports = {
site: 'www3.nhk.or.jp', site: 'www3.nhk.or.jp',
output: 'www3.nhk.or.jp.guide.xml',
days: 5, days: 5,
lang: 'en', lang: 'en',
delay: 5000, delay: 5000,
url: function ({ date }) { url: function ({ date }) {
return `https://nwapi.nhk.jp/nhkworld/epg/v7b/world/s${date.unix() * 1000}-e${ return `https://masterpl.hls.nhkworld.jp/epg/w/${date.toISOString().slice(0, 10).replace(/-/g, '')}.json`
date.add(1, 'd').unix() * 1000
}.json`
}, },
request: { request: {
@@ -26,21 +25,20 @@ module.exports = {
return context.channel.logo return context.channel.logo
}, },
parser: function (context) { async parser(context) {
const programs = [] const programs = []
const items = parseItems(context.content) const items = parseItems(context.content)
items.forEach(item => { for (let item of items) {
programs.push({ programs.push({
title: item.title, title: item.title,
start: parseStart(item), sub_title: item.episodeTitle,
stop: parseStop(item), start: dayjs(item.startTime, 'YYYY-MM-DDTHH:mm:ssZ'),
stop: dayjs(item.endTime, 'YYYY-MM-DDTHH:mm:ssZ'),
description: item.description, description: item.description,
image: parseImage(item), image: item.episodeThumbnailURL ? item.episodeThumbnailURL : item.thumbnail
sub_title: item.subtitle
}) })
}) }
return programs return programs
} }
@@ -49,20 +47,8 @@ module.exports = {
function parseItems(content) { function parseItems(content) {
if (content != '') { if (content != '') {
const data = JSON.parse(content) const data = JSON.parse(content)
return !data || !data.channel || !Array.isArray(data.channel.item) ? [] : data.channel.item return !data || !data.data || !Array.isArray(data.data) ? [] : data.data
} else { } else {
return [] return []
} }
} }
function parseStart(item) {
return dayjs.unix(parseInt(item.pubDate) / 1000)
}
function parseStop(item) {
return dayjs.unix(parseInt(item.endDate) / 1000)
}
function parseImage(item) {
return 'https://www.nhk.or.jp' + item.thumbnail
}

View File

@@ -1,9 +1,12 @@
const { url, parser } = require('./www3.nhk.or.jp.config.js') const { url, parser } = require('./www3.nhk.or.jp.config.js')
const fs = require('fs')
const path = require('path')
const dayjs = require('dayjs') const dayjs = require('dayjs')
const utc = require('dayjs/plugin/utc') const utc = require('dayjs/plugin/utc')
dayjs.extend(utc) dayjs.extend(utc)
const date = dayjs.utc('2023-04-29', 'YYYY-MM-DD').startOf('d') const date = dayjs.utc('2025-10-20', 'YYYY-MM-DD').startOf('d')
const channel = { const channel = {
site_id: '0', site_id: '0',
xmltv_id: 'NHKWorldJapan.jp', xmltv_id: 'NHKWorldJapan.jp',
@@ -11,33 +14,54 @@ const channel = {
logo: 'https://www3.nhk.or.jp/nhkworld/common/site_images/nw_webapp_1024x1024.png' logo: 'https://www3.nhk.or.jp/nhkworld/common/site_images/nw_webapp_1024x1024.png'
} }
const content = fs.readFileSync(path.resolve(__dirname, '__data__/schedule.json'), 'utf8')
const context = { channel: channel, content: content, date: date }
it('can generate valid url', () => { it('can generate valid url', () => {
expect(url({ channel, date })).toBe( expect(url({ date })).toBe(
'https://nwapi.nhk.jp/nhkworld/epg/v7b/world/s1682726400000-e1682812800000.json' 'https://masterpl.hls.nhkworld.jp/epg/w/20251020.json'
) )
}) })
it('can parse response', () => { it('can handle empty guide', async () => {
const content = const results = await parser({ content: '' })
'{"channel":{"item":[{"seriesId":"1007","airingId":"000","title":"NHK NEWSLINE","description":"NHK WORLD-JAPAN\'s flagship hourly news program delivers the latest world news, business and weather, with a focus on Japan and the rest of Asia.","link":"/nhkworld/en/news/","pubDate":"1682726400000","endDate":"1682727000000","vodReserved":false,"jstrm":"1","wstrm":"1","subtitle":"","content":"","content_clean":"","pgm_gr_id":"","thumbnail":"/nhkworld/upld/thumbnails/en/tv/regular_program/340aed63308aafd1178172abf6325231_large.jpg","thumbnail_s":"/nhkworld/upld/thumbnails/en/tv/regular_program/340aed63308aafd1178172abf6325231_small.jpg","showlist":"0","internal":"0","genre":{"TV":"11","Top":"","LC":""},"vod_id":"","vod_url":"","analytics":"[nhkworld]simul;NHK NEWSLINE;w02,001;1007-000-2023;2023-04-29T09:00:00+09:00"}]}}'
const results = parser({ content })
expect(results).toMatchObject([
{
title: 'NHK NEWSLINE',
start: dayjs(1682726400000),
stop: dayjs(1682727000000),
description:
"NHK WORLD-JAPAN's flagship hourly news program delivers the latest world news, business and weather, with a focus on Japan and the rest of Asia.",
image:
'https://www.nhk.or.jp/nhkworld/upld/thumbnails/en/tv/regular_program/340aed63308aafd1178172abf6325231_large.jpg',
sub_title: ''
}
])
})
it('can handle empty guide', () => {
const results = parser({ content: '' })
expect(results).toMatchObject([]) expect(results).toMatchObject([])
}) })
it('can parse response', async () => {
const results = await parser(context)
expect(results[0]).toMatchObject(
{
title: 'NHK NEWSLINE',
sub_title: '',
start: dayjs('2025-10-19T15:00:00.000Z'),
stop: dayjs('2025-10-19T15:10:00.000Z'),
description: 'NHK NEWSLINE brings you up to date with the latest from Japan, Asia and around the world. Our team covers breaking news and major developments, with trusted anchors to tie it all together.',
image: '',
}
)
expect(results[1]).toMatchObject(
{
title: 'J-MELO',
sub_title: 'Furui Riho and shallm',
start: dayjs('2025-10-19T15:10:00.000Z'),
stop: dayjs('2025-10-19T15:38:00.000Z'),
description: '*This program was first broadcast on April 13, 2025. \nJoin May J. for Japanese music! This week: Furui Riho (a singer-songwriter with gospel roots) and shallm (a band project from vocalist, lyricist, and composer lia).\nOn Demand until October 26, 2025',
image: 'https://www3.nhk.or.jp/nhkworld/en/shows/2004445/images/wide_l_7eJOqZrlZQFF8GEPfH0emqOOlggwyC543Cv71Oou.jpg',
}
)
expect(results[2]).toMatchObject(
{
title: 'INFO',
sub_title: '',
start: dayjs('2025-10-19T15:38:00.000Z'),
stop: dayjs('2025-10-19T15:40:00.000Z'),
description: ' ',
image: '',
}
)
})