YouTube-RSS/youtube_rss.py

#!/usr/bin/env python3

"""
YouTube-RSS is a simple script to build RSS feeds from YouTube links.
You can pass links to channels, play-lists or even user pages.
The script is modular, so any new entity could be added easily.
"""


import argparse
import sys

from typing import Optional

from urllib.error import HTTPError
from urllib.request import Request, urlopen
from urllib.parse import parse_qs, urlparse, urlencode


# YouTube has only one URL to generate RSS feeds.
# GET parameters will change the output accordingly.
RSS_FEEDS_BASE_URL = r'https://www.youtube.com/feeds/videos.xml'


def get_feed(entity: str, token: str) -> str:
    """
    Return a well-formatted RSS feed URL based on the constant above.
    Passed parameter will be transformed as GET parameters.
    """
    return '{0}?{1}'.format(
        RSS_FEEDS_BASE_URL,
        urlencode({entity: token})  # Parameters should already be URL-encoded, but who knows ?
    )


def check_link(link: str) -> Optional[str]:
    """
    Verify that passed link is valid upstream, with a HEAD request.
    This function returns any encountered error, or `None` when the link is valid.
    """
    try:
        response = urlopen(Request(link, method='HEAD'))
    except HTTPError as error:
        return str(error.code)

    if response.getcode() != 200 or response.msg != 'OK':
        return '{0} {1}'.format(response.getcode(), response.msg)

    if response.getheader('Server') != "YouTube RSS Feeds server":
        return "Unknown RSS feeds server"

    return None


def process_link(i: int, link: str, no_check: bool = False):
    """Main logic, parse and analyze the link and compute RSS feed"""

    # Parse the passed link.
    parsing = urlparse(link)

    # Parse the GET parameters (if any).
    get_params = parse_qs(parsing.query)

    # Check that the provided link belongs to YouTube.
    if not parsing.netloc.endswith(('youtube.com', 'youtu.be')):
        print(
            "[{0}] The provided link is not a YouTube link. Skipping.".format(i),
            file=sys.stderr
        )
        return

    # Starts URL pattern matching.
    if parsing.path.startswith(('/c/', '/user/')):
        rss_feed = get_feed('user', parsing.path.split('/')[2])
    elif parsing.path.startswith('/channel/'):
        rss_feed = get_feed('channel_id', parsing.path.split('/')[2])
    elif 'list' in get_params:
        rss_feed = get_feed(
            'playlist_id',
            get_params.get('list')[0]  # type: ignore
        )
    else:
        print(
            "[{0}] The provided link has an unknown format. Skipping.".format(i),
            file=sys.stderr
        )
        return

    error_reason = (check_link(rss_feed) if not no_check else None)
    if not error_reason:
        print("[{0}] <{1}>".format(i, rss_feed))
    else:
        print(
            "[{0}] No responding RSS feed could be retrieved ({1}).".format(
                i, error_reason
            ),
            file=sys.stderr
        )


def main():
    """Simple entry point"""
    parser = argparse.ArgumentParser(
        description="A simple RSS feeds generation for YouTube entities",
        prog="YouTube-RSS"
    )
    parser.add_argument(
        'links',
        type=str,
        nargs='*',
        help="YouTube link(s) to convert to RSS feed(s)"
    )
    parser.add_argument(
        '--read-stdin',
        action='store_true',
        help="Use this option to read input links from STDIN directly"
    )
    parser.add_argument(
        '--no-check',
        action='store_true',
        help="Don\'t check the resulting RSS feeds validity upstream"
    )
    parser.add_argument(
        '--version',
        action='version', version="%(prog)s : 1.4.0"
    )

    args = parser.parse_args()

    # Allow parameters to be directly piped in STDIN.
    if args.read_stdin:
        args.links = sys.stdin.read().split()

    # When no link could be retrieved, show help and exit.
    if not args.links:
        parser.print_help()
        sys.exit(2)

    # Iterate over passed links and process them.
    for i, link in enumerate(args.links):
        process_link(i, link, args.no_check)


if __name__ == '__main__':
    main()