Houston We Have a Problem

Apollo 13
TLI, MCC-2, DPS-1

what is important?
mass of the spacecraft, size of the moon, motion of the moon, size of the earth, motion of the earth, 3D

sine & friends
b/c = sinβ
a/c = cosβ

60°=1rad、0.1rad=6°、sin40°=6.4
sin(40)=0.6427876096865393

Motion of the moon around the earth
t = time
4.10^8 2πt*27days

Using match

def get_db(db_name):
	from pymongo import MongoClient
	client = MongoClient('localhost:27017')
	db = client[db_name]
	return db

def make_pipeline():
	pipeline = [ ]
	return pipeline

def aggregate(db, pipeline):
	return [doc for doc in db.tweets.aggregate(pipeline)]

if __name__ == '__main__':
	db = get_db('twitter')
	pipeline = make_pipeline()
	result = aggregate(db, pipeline)
	import pprint
	assert len(result) == 1
	assert result[0]["followers"] == 17209

Insert into the DB

import json

def insert_data(data, db)

	passs

if __name__ == "__main__":

	from pymongo import MongoClient
	client = MongoClient("mongodb://localhost:27017")
	db = client.examples

	with open('arachnid.json') as f:
		data = json.loads(f.read())
		insert_data(data, db)
		print db.arachnid.find_one()

Preparing data

import codecs
import csv
import json
import pprint
import re

DATAFILE = 'arachnid.csv'
FIELDS ={'rdf-schema#label': 'label',
         'URI': 'uri',
         'rdf-schema#comment': 'description',
         'synonym': 'synonym',
         'name': 'name',
         'family_label': 'family',
         'class_label': 'class',
         'phylum_label': 'phylum',
         'order_label': 'order',
         'kingdom_label': 'kingdom',
         'genus_label': 'genus'}

def process_file(filename, fields):
	process_fields = fields.keys()
	data = []
	with open(filename, "r") as f:
		reader = csv.DictReader(f)
		for i in range(3):
			l = reader.next()

		for line in reader:
			pass
	return data

def parse_array(v):
	if(v[0] == "{") and (v[-1] == "}"):
		v = v.lstrip("{")
		v = v.rstrip("}")
		v_array = v.split("|")
		v_array = [i.strip() for i in v_array]
		return v_array
	return [v]

def test():
	data = process_file(DATAFILE, FIELDS)
	print "your first entry:"
	pprint.pprint(data[0])
	first_entry = {
		"synonym": None,
		"name": "Argiope",
		"classification" : {
			"kingdom":"Animal",
			"family":"Orb-weaver spider",
			"order": "Spider",
			"phylum": "Arthropod",
			"genus": None,
			"class": "Arachnid"
		},
		"uri": "http://dbpedia.org/resource/Argiope_(spider)",
		"label":"Argiope",
		"description": "The genus Argiope includes rather large and spectacular spiders that often have a strikingly coloured abdomen. These spiders are distributed throughout the world. Most countries in tropical or temperate climates host one or more species that are similar in appearance. The etymology of the name is from a Greek name meaning silver-faced."
	}
	assert len(data) = 76
	assert data[0] == first_entry
	assert data[17]["name"] == "Ogdenia"
	assert data[48]["label"] == "Hydrachnidiae"
	assert data[14]["synonym"] == ["Cyrene Peckham & Peckham"]

if __name__ == "__main__"
	test

Range Queries

#!/usr/bin/env python
import pprint

client = MongoClient("mongodb://localhost:27017")

db = client.examples

def find():
	query = {"population" : {"$gt" : 250000}}
	cities = db.cities.find(query)

	num_cities = 0
	for c in cities:
		pprint.pprint(c)
		num_cities += 1

	print "\nNumber of cities matching: %d\n" % num_cities
from datetime import datetime

def range_query():
	query = {}
	return query

def get_db():
	from pymongo import MongoClient
	client = MongoClient('localhost:27017')
	db = client.examples
	return db

if __name__ == "__main__":
	db = get_db()
	query = range_query()
	cities = db.cities.find(query)

	print "Found cities:", cities.count()
	import pprint
	pprint.pprint(cities[0])

Multiple Field

from pymongo import MongoClient
import pprint

client = MongoClient("mongodb://localhost:27017")

db = client.examples

def find():
	autos = db.autos.find(
		{
			"manufacturer" : "Toyota" , "class": "mid-size car"
		})
		for a in autos:
			pprint.pprint(a)

if __name__ == '__main__':
	find()
#!/usr/bin/env python
from autos import process_file

def insert_autos(infile, db):
	data = process_file(infile)

if __name__ == "__main__":
	from pymongo import MongoClient
	client = MongoClient("mongodb://localhost:27017")
	db = client.examples

	insert_autos('autos-small.csv', db)
	print db.autos.find_one()

PyMongo

from pymongo import MongoClient
import pprint

client = MongoClient('mongodb://localhost:27017/')

tesla_s = {
	"manufacturer" : "Tesla Motors",
	"class" : "full-size",
	"body style" : "5-door liftback",
	"production" : [2012, 2013],
	"model years" : [2013],
	"layout" : ["Rear-motor", "rear-wheel drive"],
	"designer" : {
		"firstname" : "Franz",
		"surname" : "von Holzhusen"
	}
}

db = client.examples
db.autos.insert(tesla_s)

for a in db.autos.find():
	pprint.pprint(a)

query

from pymongo import MongoClient
import pprint

client = MongoClient('mongodb://localhost:27017/')

db = client.examples

def find():
	autos = db.autos.find({ "manufacturer" : "Toyota" })
	for a in autos:
		pprint.pprint(a)

if __name__ == '__main__':
	find()
#!/usr/bin/env python

def porsche_query():
	query = {}
	return query

def get_db(db_name):
	from pymongo import MongoClient
	client = MongoClient('localhost:27017')
	db = client[db_name]
	return db

def find_porsche(db, query):
	return db.autos.find(query)

if __name__ == "__main__":
	db = get_db('examples')
	query = porsche_query()
	results = find_porsche(db, query)

	print "Printing first 3 result\n"
	import pprint
	for car in rsults[:3]:
		pprint.pprint(car)

Data modeling

api.mongodb.com

{
	"manufacturer" : "Tesla Motors",
	"class" : "full-size",
	"body style" : "5-door liftback"
}

production felds

{
	"production" : [2012, 2013],
	"model years" : [2013],
	"layout" : ["Rear-motor", "rear-wheel drive"]
}

designer

{
	"designer" : {
		"firstname" : "Franz",
		"surname" : "von Holzhusen"
	}
}
{
	"assembly" : [
		{
			"country" : "United States",
			"city" : "Fremount",
			"state" : "California"
		},
		{
			"country" : "The Netherlands",
			"city" : "Tilburg"
		}
	]
}
{
	"manufacturer" : "Tesla Motors",
	"class" : "full-size",
	"body style" : "5-door liftback",
	"production" : [2012, 2013],
	"model years" : [2013],
	"layout" : ["Rear-motor", "rear-wheel drive"],
	"designer" : {
		"firstname" ; "Franz",
		"surname" : "von Holzhusen"
	},
	"assembly" : [
		{
			"country" : "United States",
			"city" : "Fremont",
			"state" : "California"
		},]
}
def add_city(db):
	db.cities.insert({"name": "Chicago"})

def get_city(db):
	return db.cities.find_one()

def get_db():
	from pymongo import MongoClient
	client = MongoClient('localhost:27017')
	db = client.examples
	return db

if __name__ == "__main__":
	add_city(db)
	print get_city(db)

Fixing the area

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import codecs
import csv
import json
import pprint

CITIES = 'cities.csv'

def fix_area(area):
	return area

def process_file(filename):
	data = []

	with open(filename, "r") as f:
		reader = csv.DictReader(f)

		for i in range(3):
			l = reader.next()

		for line in reader:
			if "areaLand" in line:
				line["areaLand"] = fix_area(line["areaLand"])

	return data

def test():
	data = process_file(CITIES)

	print "Printing three example results:"
	for n in range(5, 8):
		pprint.pprint(data[n]["areaLand"])

	assert data[3]["areaLand"] == None
	assert data[8]["areaLand"] == 55166700.0
	assert data[20]["areaLand"] == 14581600.0
	assert data[33]["areaLand"] == 20564500.0

if __name__ == "__main__":
	test()

Data Quality

import codecs
import csv
import json
import pprint

CITIES = 'cities.csv'

FIELDS = ["name", "timeZone_label", "utcOffset", "homepage", "governmentType_label",
          "isPartOf_label", "areaCode", "populationTotal", "elevation",
          "maximumElevation", "minimumElevation", "populationDensity",
          "wgs84_pos#lat", "wgs84_pos#long", "areaLand", "areaMetro", "areaUrban"]

def audit_file(filename, fields):
	fieldtypes = {}

	return fieldtypes

def test():
	fieldtypes = audit_file(CITIES, FIELDS)

	pprint.pprint(fieldtypes)

	assert fieldtypes["areaLand"] == set([type(1.1), type([]), type(None)])
	assert fieldtypes['areaMetro'] == set([type(1.1), type(None)])

if __name__ =="__main__":
	test()