Source code for photomosaic.flickr

import warnings
import os
import re
import urllib
import requests
import itertools
import json
from tqdm import tqdm
from .photomosaic import options


PUBLIC_URL = "https://www.flickr.com/photos/"
API_URL = 'https://api.flickr.com/services/rest/'
PATH = "http://farm{farm}.staticflickr.com/{server}/"
NAME = "{id}_{secret}_b.jpg"


def _flickr_request(**kwargs):
    params = dict(api_key=options['flickr_api_key'],
                  format='json',
                  nojsoncallback=1,
                  **kwargs)
    response = requests.get(API_URL, params=params)
    return response.json()


[docs]def from_search(text, dest, cutoff=4000, license=None):
    """
    Download photos matching a search query and the specified license(s).

    Parameters
    ----------
    text : string
        Search query
    dest : string
        Output directory
    cutoff : integer or None, optional
        Max number of images to download. By default, None; all matches
        up to Flickr's max (4000) will be downloaded.
    license : list or None
        List of license codes documented by Flickr at
        https://www.flickr.com/services/api/flickr.photos.licenses.getInfo.html
        If None, photomosaic defaults to ``[1, 2, 4, 5, 7, 8]``. See link for
        details.
    """
    dest = os.path.expanduser(dest)
    if license is None:
        license = [1, 2, 4, 5, 7, 8]
    os.makedirs(dest, exist_ok=True)
    total = itertools.count(0)
    raw_licenses = _flickr_request(method='flickr.photos.licenses.getInfo')
    licenses = {item.pop('id'): item
                for item in raw_licenses['licenses']['license']}
    for page in itertools.count(1):
        response = _flickr_request(
                method='flickr.photos.search',
                license=','.join(map(str, license)),
                extras='owner_name,license',
                per_page=500,  # the max allowed value, to conserve our queries
                text=text,
                content_type=1,  # photos only
                page=page
        )
        if response.get('stat') != 'ok':
            # If we fail requesting page 1, that's an error. If we fail
            # requesting page > 1, we're just out of photos.
            if page == 1:
                raise RuntimeError("response: {}".format(response))
            break
        photos = response['photos']['photo']
        pbar = tqdm(photos, desc='downloading page {}'.format(page))
        for photo in pbar:
            if (cutoff is not None) and (next(total) > cutoff):
                pbar.close()
                return
            # Download and save image.
            url = (PATH + NAME).format(**photo)
            filename = (NAME).format(**photo)
            filepath = os.path.join(dest, filename)
            _try_retrieve_warn_failure(url, filepath)
            # Save metadata for attribution.
            metapath = os.path.splitext(filepath)[0] + '.json'
            with open(metapath, 'w') as metafile:
                # Collect attribution info as specified by Creative Commons
                # best practices:
                # https://wiki.creativecommons.org/wiki/best_practices_for_attribution#Title.2C_Author.2C_Source.2C_License
                license_id = photo['license']
                attribution = {'title': photo['title'],
                               'owner': photo['owner'],
                               'owner_name': photo['ownername'],
                               'owner_url': PUBLIC_URL + photo['ownername'],
                               'license_url': licenses[license_id]['url'],
                               'license_name': licenses[license_id]['name'],
                               'license': license_id}
                json.dump(attribution, metafile)


def _get_photoset(photoset_id, nsid, dest):
    dest = os.path.expanduser(dest)
    os.makedirs(dest, exist_ok=True)
    for page in itertools.count(1):
        response = _flickr_request(
                method='flickr.photosets.getPhotos',
                photoset_id=photoset_id,
                nsid=nsid,
                content_type=1,  # photos only
                page=page
        )
        if response.get('stat') != 'ok':
            # If we fail requesting page 1, that's an error. If we fail
            # requesting page > 1, we're just out of photos.
            if page == 1:
                raise RuntimeError("response: {}".format(response))
            break
        photos = response['photoset']['photo']
        for photo in tqdm(photos, desc='downloading page {}'.format(page)):
            url = (PATH + NAME).format(**photo)
            filename = (NAME).format(**photo)
            filepath = os.path.join(dest, filename)
            _try_retrieve_warn_failure(url, filepath)
            # Save metadata for attribution.
            metapath = os.path.splitext(filepath)[0] + '.json'
            with open(metapath, 'w') as metafile:
                json.dump(photo, metafile)


def _try_retrieve_warn_failure(url, filepath):
    errors = []
    for _ in range(3):
        try:
            urllib.request.urlretrieve(url, filepath)
        except urllib.error.URLError as error:
            errors.append(error)
            continue  # try again
        else:
            break
    else:
        # tried 3 times, failed every time
        warnings.warn("Skipping {}: {}".format(url, errors))


[docs]def from_url(url, dest):
    """
    Download an album ("photoset") from its url.

    The is no programmatic license-checking here; that is up to the user.

    Parameters
    ----------
    url : string
        e.g., https://www.flickr.com/photos/<username>/sets/<photoset_id>
    dest : string
        Output directory
    """
    dest = os.path.expanduser(dest)
    m = re.match(PUBLIC_URL + "(.*)/sets/([0-9]+)", url)
    if m is None:
        raise ValueError("""Expected URL like:
https://www.flickr.com/photos/<username>/sets/<photoset_id>""")
    username, photoset_id = m.groups()
    response = _flickr_request(method="flickr.urls.lookupUser",
                               url=PUBLIC_URL + username)
    nsid = response['user']['username']['_content']
    return _get_photoset(photoset_id, nsid, dest)