Source code for AutomateTheBoringStuff.Ch15.P7_multidownloadXkcd

#! python3
"""Multidownload XKCD

Downloads 1400 `XKCD`_ comics much faster by using :py:mod:`threading`.

Note:
    Default output directory is ``./xkcd``.

.. _XKCD:
    https://xkcd.com/

"""


[docs]def downloadXkcd(startComic: int, endComic: int) -> None:
    """Download XKCD

    Uses :py:mod:`requests` and :py:mod:`bs4` to download all comics in a given range.

    Args:
        startComic: Comic ID number to start from.
        endComic: Comic ID number to end at.

    Returns:
        None. Prints status updates and downloads comics to download directory.

    """
    for urlNumber in range(startComic, endComic):
        # Download the page.
        print('Downloading page http://xkcd.com/%s...' % urlNumber)
        res = requests.get('http://xkcd.com/%s' % urlNumber)
        res.raise_for_status()

        soup = bs4.BeautifulSoup(res.text)

        # Find the URL of the comic image.
        comicElem = soup.select('#comic img')
        if not comicElem:
            print('Could not find comic image.')
        else:
            comicUrl = 'http:' + comicElem[0].get('src')
            # Download the image.
            print('Downloading image %s...' % comicUrl)
            res = requests.get(comicUrl)
            res.raise_for_status()

            # Save the image to ./xkcd.
            imageFile = open(os.path.join('xkcd', os.path.basename(comicUrl)), 'wb')
            for chunk in res.iter_content(100000):
                imageFile.write(chunk)
            imageFile.close()


[docs]def main():
    import requests, os, bs4, threading
    os.makedirs('xkcd', exist_ok=True)  # store comics in ./xkcd

    # Create and start the Thread objects.
    downloadThreads = []  # a list of all the Thread objects
    for i in range(0, 1400, 100):  # loops 14 times, creates 14 threads
        downloadThread = threading.Thread(target=downloadXkcd, args=(i, i + 99))
        downloadThreads.append(downloadThread)
        downloadThread.start()

    # Wait for all threads to end.
    for downloadThread in downloadThreads:
        downloadThread.join()
    print('Done.')


if __name__ == '__main__':
    main()