diff --git a/scripts/commands/epg/grab.ts b/scripts/commands/epg/grab.ts index 0b90bc19..c111a2bf 100644 --- a/scripts/commands/epg/grab.ts +++ b/scripts/commands/epg/grab.ts @@ -1,15 +1,19 @@ import { Logger, Timer, Collection, Template } from '@freearhey/core' import epgGrabber, { EPGGrabber, EPGGrabberMock } from 'epg-grabber' -import { loadJs, parseProxy, SiteConfig, Queue } from '../../core' +import { CurlBody } from 'curl-generator/dist/bodies/body' +import { loadJs, parseProxy, Queue } from '../../core' import { Channel, Guide, Program } from '../../models' import { SocksProxyAgent } from 'socks-proxy-agent' +import defaultConfig from '../../default.config' import { PromisyClass, TaskQueue } from 'cwait' import { Storage } from '@freearhey/storage-js' +import { CurlGenerator } from 'curl-generator' import { QueueItem } from '../../types/queue' import { Option, program } from 'commander' import { SITES_DIR } from '../../constants' import { data, loadData } from '../../api' import dayjs, { Dayjs } from 'dayjs' +import merge from 'lodash.merge' import path from 'path' program @@ -53,6 +57,7 @@ program .env('GZIP') ) .addOption(new Option('--curl', 'Display each request as CURL').default(false).env('CURL')) + .addOption(new Option('--debug', 'Enable debug mode').default(false).env('DEBUG')) .parse() interface GrabOptions { @@ -61,6 +66,7 @@ interface GrabOptions { output: string gzip: boolean curl: boolean + debug: boolean maxConnections: number timeout?: number delay?: number @@ -72,25 +78,87 @@ interface GrabOptions { const options: GrabOptions = program.opts() async function main() { - if (!options.site && !options.channels) + if (typeof options.site !== 'string' && typeof options.channels !== 'string') throw new Error('One of the arguments must be presented: `--site` or `--channels`') - const logger = new Logger() + const LOG_LEVELS = { info: 3, debug: 4 } + const logger = new Logger({ level: options.debug ? LOG_LEVELS['debug'] : LOG_LEVELS['info'] }) logger.info('starting...') + let config: epgGrabber.Types.SiteConfig = defaultConfig - logger.info('config:') - logger.tree(options) + if (typeof options.timeout === 'number') + config = merge(config, { request: { timeout: options.timeout } }) + if (options.proxy !== undefined) { + const proxy = parseProxy(options.proxy) + if ( + proxy.protocol && + ['socks', 'socks5', 'socks5h', 'socks4', 'socks4a'].includes(String(proxy.protocol)) + ) { + const socksProxyAgent = new SocksProxyAgent(options.proxy) + config = merge(config, { + request: { httpAgent: socksProxyAgent, httpsAgent: socksProxyAgent } + }) + } else { + config = merge(config, { request: { proxy } }) + } + } + + if (typeof options.output === 'string') config.output = options.output + if (typeof options.days === 'number') config.days = options.days + if (typeof options.delay === 'number') config.delay = options.delay + if (typeof options.maxConnections === 'number') config.maxConnections = options.maxConnections + if (typeof options.curl === 'boolean') config.curl = options.curl + if (typeof options.gzip === 'boolean') config.gzip = options.gzip + + const grabber = + process.env.NODE_ENV === 'test' ? new EPGGrabberMock(config) : new EPGGrabber(config) + + const globalConfig = grabber.globalConfig + + logger.debug(`config: ${JSON.stringify(globalConfig, null, 2)}`) + + grabber.client.instance.interceptors.request.use( + request => { + if (globalConfig.curl) { + type AllowedMethods = + | 'GET' + | 'get' + | 'POST' + | 'post' + | 'PUT' + | 'put' + | 'PATCH' + | 'patch' + | 'DELETE' + | 'delete' + + const url = request.url || '' + const method = request.method ? (request.method as AllowedMethods) : 'GET' + const headers = request.headers + ? (request.headers.toJSON() as Record) + : undefined + const body = request.data ? (request.data as CurlBody) : undefined + + const curl = CurlGenerator({ url, method, headers, body }) + + console.log(curl) + } + + return request + }, + error => Promise.reject(error) + ) logger.info('loading channels...') const storage = new Storage() let files: string[] = [] - if (options.site) { + if (typeof options.site === 'string') { let pattern = path.join(SITES_DIR, options.site, '*.channels.xml') pattern = pattern.replace(/\\/g, '/') files = await storage.list(pattern) - } else if (options.channels) { + } else if (typeof options.channels === 'string') { files = await storage.list(options.channels) } @@ -105,7 +173,7 @@ async function main() { channelsFromXML.concat(_channelsFromXML) } - if (options.lang) { + if (typeof options.lang === 'string') { channelsFromXML = channelsFromXML.filter((channel: Channel) => { if (!options.lang) return true @@ -119,7 +187,6 @@ async function main() { await loadData() logger.info('creating queue...') - let index = 0 const queue = new Queue() @@ -127,40 +194,13 @@ async function main() { channel.index = index++ if (!channel.site || !channel.site_id || !channel.name) continue - const configObject = await loadJs(channel.getConfigPath()) - - const siteConfig = new SiteConfig(configObject) - - siteConfig.filepath = channel.getConfigPath() - - if (typeof options.timeout === 'number') { - siteConfig.request = { ...siteConfig.request, ...{ timeout: options.timeout } } - } - if (typeof options.days === 'number') siteConfig.days = options.days - if (typeof options.delay === 'number') siteConfig.delay = options.delay - if (typeof options.curl === 'boolean') siteConfig.curl = options.curl - if (typeof options.proxy === 'string') { - const proxy = parseProxy(options.proxy) - - if ( - proxy.protocol && - ['socks', 'socks5', 'socks5h', 'socks4', 'socks4a'].includes(String(proxy.protocol)) - ) { - const socksProxyAgent = new SocksProxyAgent(options.proxy) - - siteConfig.request = { - ...siteConfig.request, - ...{ httpAgent: socksProxyAgent, httpsAgent: socksProxyAgent } - } - } else { - siteConfig.request = { ...siteConfig.request, ...{ proxy } } - } - } + const config = await loadJs(channel.getConfigPath()) + const days: number = config.days || globalConfig.days if (!channel.xmltv_id) channel.xmltv_id = channel.site_id const currDate = dayjs.utc(process.env.CURR_DATE || new Date().toISOString()) - const dates = Array.from({ length: siteConfig.days }, (_, day) => currDate.add(day, 'd')) + const dates = Array.from({ length: days }, (_, day) => currDate.add(day, 'd')) dates.forEach((date: Dayjs) => { const key = `${channel.site}:${channel.lang}:${channel.xmltv_id}:${date.toJSON()}` @@ -168,14 +208,12 @@ async function main() { queue.add(key, { channel, date, - siteConfig, + config, error: null }) }) } - const grabber = process.env.NODE_ENV === 'test' ? new EPGGrabberMock() : new EPGGrabber() - const taskQueue = new TaskQueue(Promise as PromisyClass, options.maxConnections) const queueItems = queue.getItems() @@ -188,10 +226,10 @@ async function main() { const requests = queueItems.map( taskQueue.wrap(async (queueItem: QueueItem) => { - const { channel, siteConfig, date } = queueItem + const { channel, config, date } = queueItem if (!channel.logo) { - if (siteConfig.logo) { + if (config.logo) { channel.logo = await grabber.loadLogo(channel, date) } else { channel.logo = getLogoForChannel(channel) @@ -203,7 +241,7 @@ async function main() { const channelPrograms = await grabber.grab( channel, date, - siteConfig, + config, (context: epgGrabber.Types.GrabCallbackContext, error: Error | null) => { logger.info( ` [${i}/${total}] ${context.channel.site} (${context.channel.lang}) - ${ @@ -235,23 +273,18 @@ async function main() { const pathTemplate = new Template(options.output) - const channelsGroupedByKey = channels - .sortBy([(channel: Channel) => channel.index, (channel: Channel) => channel.xmltv_id]) - .uniqBy((channel: Channel) => `${channel.xmltv_id}:${channel.site}:${channel.lang}`) - .groupBy((channel: Channel) => { - return pathTemplate.format({ lang: channel.lang || 'en', site: channel.site || '' }) - }) + const channelsGroupedByKey = channels.groupBy((channel: Channel) => { + return pathTemplate.format({ lang: channel.lang || 'en', site: channel.site || '' }) + }) - const programsGroupedByKey = programs - .sortBy([(program: Program) => program.channel, (program: Program) => program.start]) - .groupBy((program: Program) => { - const lang = - program.titles && program.titles.length && program.titles[0].lang - ? program.titles[0].lang - : 'en' + const programsGroupedByKey = programs.groupBy((program: Program) => { + const lang = + program.titles && program.titles.length && program.titles[0].lang + ? program.titles[0].lang + : 'en' - return pathTemplate.format({ lang, site: program.site || '' }) - }) + return pathTemplate.format({ lang, site: program.site || '' }) + }) for (const groupKey of channelsGroupedByKey.keys()) { const groupChannels = new Collection(channelsGroupedByKey.get(groupKey)) diff --git a/scripts/core/index.ts b/scripts/core/index.ts index 6d69e053..fbe32262 100644 --- a/scripts/core/index.ts +++ b/scripts/core/index.ts @@ -1,4 +1,3 @@ export * from './htmlTable' -export * from './siteConfig' export * from './utils' export * from './queue' diff --git a/scripts/core/siteConfig.ts b/scripts/core/siteConfig.ts deleted file mode 100644 index a69c3abc..00000000 --- a/scripts/core/siteConfig.ts +++ /dev/null @@ -1,71 +0,0 @@ -import * as epgGrabber from 'epg-grabber' -import merge from 'lodash.merge' - -const _default = { - days: 1, - delay: 0, - output: 'guide.xml', - request: { - method: 'GET', - maxContentLength: 5242880, - timeout: 30000, - withCredentials: true, - jar: null, - responseType: 'arraybuffer', - cache: false, - headers: null, - data: null - }, - maxConnections: 1, - site: undefined, - url: undefined, - parser: undefined, - channels: undefined, - lang: 'en', - debug: false, - gzip: false, - curl: false, - logo: '' -} - -export class SiteConfig { - days: number - lang: string - delay: number - debug: boolean - gzip: boolean - curl: boolean - maxConnections: number - output: string - request: epgGrabber.Types.SiteConfigRequestConfig - site: string - channels?: string | string[] - url: ((context: epgGrabber.Types.SiteConfigRequestContext) => string | Promise) | string - parser: ( - context: epgGrabber.Types.SiteConfigParserContext - ) => - | epgGrabber.Types.SiteConfigParserResult[] - | Promise - logo: ((context: epgGrabber.Types.SiteConfigRequestContext) => string | Promise) | string - filepath: string - - constructor(config: epgGrabber.Types.SiteConfigObject) { - this.site = config.site - this.channels = config.channels - this.url = config.url - this.parser = config.parser - this.filepath = config.filepath - - this.days = config.days || _default.days - this.lang = config.lang || _default.lang - this.delay = config.delay || _default.delay - this.debug = config.debug || _default.debug - this.maxConnections = config.maxConnections || _default.maxConnections - this.gzip = config.gzip || _default.gzip - this.curl = config.curl || _default.curl - this.output = config.output || _default.output - this.logo = config.logo || _default.logo - - this.request = merge(_default.request, config.request) - } -} diff --git a/scripts/default.config.js b/scripts/default.config.js new file mode 100644 index 00000000..98b39982 --- /dev/null +++ b/scripts/default.config.js @@ -0,0 +1,10 @@ +export default { + days: 1, + delay: 0, + request: { + maxContentLength: 5242880, + timeout: 30000, + withCredentials: true, + jar: null + } +} diff --git a/scripts/types/queue.d.ts b/scripts/types/queue.d.ts index 26615c15..ef0fead8 100644 --- a/scripts/types/queue.d.ts +++ b/scripts/types/queue.d.ts @@ -1,10 +1,10 @@ -import { SiteConfig } from '../core/siteConfig' import { Channel } from '../models/channel' +import epgGrabber from 'epg-grabber' import { Dayjs } from 'dayjs' export interface QueueItem { channel: Channel date: Dayjs - siteConfig: SiteConfig + config: epgGrabber.Types.SiteConfig error: string | null }