Wrangling JSON

some important concepts
– using codecs module to write unicode files
– using authentication with web APIs
– using offset when accessing web APIs

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import json
import codecs
import requests

URL_MAIN = "http://api.nytimes.com/svc/"
URL_POPULAR = URL_MAIN + "mostpopular/v2/"
API_KEY = { "popular": "",
			"article": ""}

def get_from_file(kind, period):
	filename = "popular-{0}-{1}.json".format(kind, period)
	with open(filename, "r") as f:
		return json.loads(f.read())

def article_overview(kind, period):
	data = get_from_file(kind, period)
	titles = []
	urls = []

	for article in data:
		section = article["section"]
		title = article["title"]
		titles.append({section: title})
		if "media" in article:
			for m in article["media"]:
				for mm in m["media-metadata"]:
					if mm["format"] == "Standard Thumbnail":
						urls.append(mm["url"])
	return (titles, urls)

def query_site(url, target, offset):
	if API_KEY["popular"] == "" or API_KEY["article"] == "":
		print "You need to register for NYTimes Developer account to run this program."
		print "See Instructor notes for information"
		return False
	params = {"api-key": API_KEY[target], "offset": offset}
	r = requests.get(url, params = params)

	if r.status_code == requests.codes.ok:
		return r.json()
	else:
		r.raise_for_status()

def get_popular(url, kind, days, section="all-sections", offset=0):
	if days not in [1,7,30]:
		print "time period can be 1, 7, 30 days only"
		return False
	if kind not in ["viewd", "shared", "emailed"]:
		print "kind can be only one of viewd/shared/emailed"
		return False

	url += "most{0}/{1}/{2}.json".format(kind, section, days)
	data = query_site(url, "popular", offset)

	return data

def save_file(kind, period):
	data = get_popular(URL_POPULAR, "viewd", 1)
	num_results = data["num_results"]
	full_data = []
	with codecs.open("popular-{0}-{1}.json".format(kind, period), encoding='utf-8', mode='w') as v:
		for offset in range(0, num_results, 20):
			data = get_popular(URL_POPULAR, kind, period, offset=offset)
			full_data += data["results"]

		v.write(json.dumps(full_data, indent=2))

def test():
	titles, urls = article_overview("viewd", 1)
	assert len(titles) == 20
	assert len(urls) == 30
	assert titles[2] == {'Opinion': 'Professors, Wee need you!'}
	assert urls[20] == 'http://graphics8.nytimes.com/images/2014/02/17/sports/ICEDANCE/ICEDANCE-thumbStandard.jpg'

if __name__ == "__main__":
	test

Python XML
https://wiki.python.org/moin/PythonXml