""" buildrss.py
Copyright 2008, Scott Ludwig
http://www.scottlu.com

buildrss.py is a simple script to build an rss feed out of a directory of
images. This was built with Leopard's RSS picture screensaver in mind, to
show images from a home server.

Leopard's RSS picture screensaver has a number of issues:

1. It takes forever to "validate" an RSS document, for an unknown reason.
   My photo archive has > 60K pictures, which causes the screen saver
   to hang. Smaller RSS documents work fine (<1000).
2. It re-reads the RSS feed every 30 minutes, and then proceeds to download
   all the images in the feed right away rather than reading them as they
   are displayed. Another reason why many RSS items won't work.
3. Once an item with a given url is read, the screen saver won't re-read it
   even if it changes. 
4. It doesn't auto-rotate images based on EXIF orientation settings.
5. Using enclosures causes a message to be logged to the console utility.

This RSS generator addresses these issues. It reads the picture archive then
randomly selects N images from this archive, normalizes them (rotate and scale)
and saves them into a normalized image cache, with a name based on the md5sum
of the resulting file (to counteract the Leopard's caching). This script is
meant to be called periodically from a cron job such as every 24 hours to
update the cache of images with fresh ones, such as 200 pictures every 24
hours. For example, here are example cron settings:

0 0 * * * /usr/bin/python buildrss.py /mnt/pics /var/www/pics http://homeserver/pics 200 1440 900

Then to have cron pick up these settings for a new cron job, save them to
foo.cron for example, then run:

crontab foo.cron

This cron command runs the script midnight every night, selects pictures from
/mnt/pics, saves normalized pictures to /var/www/pics, uses
http://homeserver/pics as the root url for each RSS item, randomly selects 200
pictures from the picture archive, and scales to 1440 x 900 (scaling preserves
aspect ratio). Assuming you have apache running, the RSS feed is then
available as:

http://homeserver/pics/pics.rss

Note in the previous example, saving to /var/www/pics may require the cron
job run in the right user context, such as from root.

Regarding scaling: technically the screen saver does scale, but pre-scaling
makes the images much smaller, which is nice every item in the RSS feed is
copied to your Mac.

Usage:

python buildrss.py <image archive path> <dest image path> <base url> <count> <dimensions>

<image archive path>    The root of the tree containing the source images
<dest image path>       The directory to store the RSS feed and data
<base url>              The base url that accesses this directory over HTTP
<count>                 The number of images to process
<dimensions>            Scales the image to fit within these dimensions,
                        preserving the aspect ratio

The RSS feed is then available as <base url>/pics.rss. Note a file called 'map'
is also stored in <dest image path>, which maps images from the cache back
to images in the archive. This is for reference purposes only.

License:

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
"""

import os
import sys
from os.path import join
import stat
import urllib
import random
import shutil
import Image
import time
import md5
import glob

def walk(treepath):
    list = []
    class ListBuilder:
        def __init__(self, basepath):
            pass

        def callback(self, arg, dirpath, filelist):
            for file in filelist:
                filepath = join(dirpath, file)
                s = os.stat(filepath)
                if stat.S_ISDIR(s.st_mode):
                    continue
                if file[-3:] == 'JPG' or file[-3:] == 'jpg':
                    list.append(filepath)

    treepath_abs = os.path.abspath(treepath)
    os.path.walk(treepath_abs, ListBuilder(treepath_abs).callback, None)
    return list

def rotate_fix(im):
    # >>> [k for (k,v) in ExifTags.TAGS.items() if v == 'Orientation']
    # [274]
    # >>> e = i._getexif()
    # >>> if e: print e[274]
    # http://mail.python.org/pipermail/image-sig/2005-July/003427.html

    try:
        e = im._getexif()
        code = e[274]
    except:
        return im

    if code == 1:
        # normal
        return im
    elif code == 2:
        # flip horz
        return im.transpose(Image.FLIP_LEFT_RIGHT)
    elif code == 3:
        # flip vert, flip horz
        out = im.transpose(Image.FLIP_TOP_BOTTOM)
        return out.transpose(Image.FLIP_LEFT_RIGHT)
    elif code == 4:
        # flip vert
        return im.transpose(Image.FLIP_TOP_BOTTOM)
    elif code == 5:
        # rot 270 ccw, flip horz
        out = im.transpose(Image.ROTATE_270)
        return out.transpose(Image.FLIP_LEFT_RIGHT)
    elif code == 6:
        # rot 270 ccw
        return im.transpose(Image.ROTATE_270)
    elif code == 7:
        # rot 270 ccw, flip vert
        out = im.transpose(Image.ROTATE_270)
        return out.transpose(Image.FLIP_TOP_BOTTOM)
    elif code == 8:
        # rot 90 ccw
        return im.transpose(Image.ROTATE_90)
    return im

def size_fix(im, width, height):
    # Make the image fit width x height, maintaining aspect ratio

    size = (im.size[0], im.size[1])
    if size[0] > width:
        size = (width, size[1] * width / size[0])
    if size[1] > height:
        size = (size[0] * height / size[1], height)
    if size == im.size:
        return im
    return im.resize(size, resample=1)

def get_filehash(filepath_abs):
      f = open(filepath_abs)
      m = md5.new()
      while True:
        bytes = f.read(1024 * 1024)
        if len(bytes) == 0:
          break
        m.update(bytes)
      f.close()
      return m.hexdigest()

if __name__ == '__main__':
    if len(sys.argv) != 7:
        print __doc__
        sys.exit(1)

    picpath = sys.argv[1]
    cachepath = sys.argv[2]
    url = sys.argv[3]
    count = int(sys.argv[4])
    width = int(sys.argv[5])
    height = int(sys.argv[6])

    # Get a list of the available pictures
    print 'building file list...'
    list = walk(picpath)

    # Randomize the list
    print 'list length: %d' % len(list)
    piclist = random.Random().sample(list, len(list))

    # Clear the normalized image cache.
    print 'Clearing image cache'
    oldfiles = glob.glob1(cachepath, '*')
    for oldfile in oldfiles:
        try:
            os.unlink(join(cachepath, oldfile))
        except:
            print 'Cannot remove %s' % join(cachepath, oldfile)
            pass

    # Normalize the images, copy to the cache
    maplist = []
    hashlist = []
    print 'Normalizing %d pictures from list...' % count
    for picfile in piclist:
        print '%d: %s' % (len(maplist), picfile)
        try:
            im = Image.open(picfile)
            im = rotate_fix(im)
            im = size_fix(im, width, height)
        except:
            continue

        tempfile = join(cachepath, 'temp.jpg')
        im.save(tempfile)
        hash = get_filehash(tempfile)
        hashfile = join(cachepath, hash) + '.jpg'
        os.rename(tempfile, hashfile)
        hashlist.append(os.path.basename(hashfile))
        maplist.append(picfile)
        if len(maplist) >= count:
            break
        
    # Write out rss feed
    rss = file(join(cachepath, "pics.rss"), 'w')
    rss.write('<?xml version="1.0" encoding="iso-8859-1"?>\n')
    rss.write('<rss version="2.0">\n')
    rss.write('<channel>\n')
    rss.write('\n')

    if not url.endswith('/'):
        url = url + '/'

    for hashfile in hashlist:
        rss.write('<item>')
        rss.write('<description><![CDATA[<img src="')
        rss.write(url + hashfile)
        rss.write('" />]]></description>')
        rss.write('</item>\n\n')

    rss.write('</channel>\n')
    rss.write('</rss>\n')
    rss.close()

    # Write out picture map (just for reference)
    map = file(join(cachepath, 'map'), 'w')
    for i in xrange(len(maplist)):
        map.write('%s: %s\n' % (hashlist[i], maplist[i]))
    map.close()
