Pandas Series apply()

import pandas as pd

if False:
	s = pd.Series([1,2,3,4,5])
	def add_one(x):
		return x + 1
	print s.apply(add_one)

names = pd.Series([
    'Andre Agassi',
    'Barry Bonds',
    'Christopher Columbus',
    'Daniel Defoe',
    'Emilio Estevez',
    'Fred Flintstone',
    'Greta Garbo',
    'Humbert Humbert',
    'Ivan Ilych',
    'James Joyce',
    'Keira Knightley',
    'Lois Lane',
    'Mike Myers',
    'Nick Nolte',
    'Ozzy Osbourne',
    'Pablo Picasso',
    'Quirinus Quirrell',
    'Rachael Ray',
    'Susan Sarandon',
    'Tina Turner',
    'Ugueth Urbina',
    'Vince Vaughn',
    'Woodrow Wilson',
    'Yoji Yamada',
    'Zinedine Zidane'
	])

def reverse_names(names):
	split_name = name.split(" ")
	first_name = split_name[0]
	last_name = split_name[1]
	return last_name + ', ' + first_name

Vectorized Operations

vector 123 * scalor 3 = 123123123, 369, error
these are reasonable answers

More vectorized operation
math operation
add, subtract, multiply, divide, expertise

Logical opereation
&, |, ~

Comparison operations
>, >=, <, <=, ==, !=

import numpy as np

if False:
	a = np.array([1, 2, 3, 4])
	b = np.array([1, 2, 1, 2])

	print a + b
	print a – b
	print a * b
	print a / b
	print a ** b

if False
	a = np.array([1, 2, 3, 4])
	b = 2

	print a + b
	print a – b
	print a * b
	print a / b
	print a ** b	

if False:
	a = np.array([1,2,3,4,5])
	b = np.array([5,4,3,2,1])

	print a > b
	print a >= b
	print a < b
	print a <= b
	print a == b
	print a != b

code snippet
import numpy as np
a = np.array([1,2,3,4])
b = a
a += np.array([1,1,1,1])
print b

+= operates in-place while + does not

import pandas as pandas

if False:
	s1 = pd.Series([1,2,3,4], index=['a','b','c','d'])
	s2 = pd.Series([10,20,30,40], index['a','b','c','d'])
	print s1 + s2

if False:
	s1 = pd.Series([1,2,3,4], index=['a','b','c','d'])
	s2 = pd.Series([10,20,30,40], index['a','b','c','d'])
	print s1 + s2

if False:
	s1 = pd.Series([1,2,3,4], index=['a','b','c','d'])
	s2 = pd.Series([10,20,30,40], index['a','b','c','d'])
	print s1 + s2

if False:
	s1 = pd.Series([1,2,3,4], index=['a','b','c','d'])
	s2 = pd.Series([10,20,30,40], index['a','b','c','d'])
	print s1 + s2

NumPy Array

import numpy as np

countries = np.array([
    'Afghanistan', 'Albania', 'Algeria', 'Angola', 'Argentina',
    'Armenia', 'Australia', 'Austria', 'Azerbaijan', 'Bahamas',
    'Bahrain', 'Bangladesh', 'Barbados', 'Belarus', 'Belgium',
    'Belize', 'Benin', 'Bhutan', 'Bolivia',
    'Bosnia and Herzegovina'
])

employment = np.array([
    55.70000076,  51.40000153,  50.5       ,  75.69999695,
    58.40000153,  40.09999847,  61.5       ,  57.09999847,
    60.90000153,  66.59999847,  60.40000153,  68.09999847,
    66.90000153,  53.40000153,  48.59999847,  56.79999924,
    71.59999847,  58.40000153,  70.40000153,  41.20000076
])

if False:
	print countries[0]
	print countries[3]

if False:
	print countries[0:3]
	print countries[:3]
	print countries[17:]
	print countries[:]

if False:
	print countries.dtype
	print employment.dtype
	print np.array([0, 1, 2, 3]).dtype
	print np.array([1.0, 1.5, 2.0, 2.5]).dtype
	print np.array([True, False, True]).dtype
	print np.array(['AL', 'AK', 'AZ', 'AR', 'CA']).dtype

if False:
	for country in countries:
		print 'Examining country {}'.format(country)

	for i in range(len(countries)):
		country = countries[i]
		country_employment = employment[i]
		print 'Country {} has employment {}'.format(country,
			country_employment)

if False:
	print employment.mean()
	print employment.std()
	print employment.max()
	print employment.sum()

def max_employment(countries, employment):
	max_country = None
	max_value = None

	return (max_country, max_value)

pandas and NumPy

Gapminder Data
-employment levels
-life expectancy
-GDP
-School Completion Rates

import pandas as pd
daily_engagement = pd.read_csv('daily_engagement_full.csv')
len(daily_engagement['acct'].unique())

One-dimensional data structures
Panda, NumPy(numerical Python)
Series -> built on Array
more features, simpler

Making histograms in python

data = [1,2,1,3,3,1,4,2]

%matplotlib inline
import matplotlib.pyplot as plt
plt.hist(data)

Lots of different pieces of information to look at
These features can interact

plt.xlabel(“label for x axis”)
plt.ylabel(“label for y axis”)
plt.title(“title of plot”)

lesson completed

from collections import defaultdict

engagemnt_by_account = defaultdict(list)
for engagement_record in paid_engagement_in_first_week:
	account_key = engagement_record['account_key']
	engagement_by_account[account_key].append(engagement_record)

total_minutes_by_account = {}

for account_key, engegement_for_student in engagement_by_account.items():
	total_minutes = 0
	for engagement_record in engagement_for_student:
		total_minutes += engagement_record['total_minutes_visited']
	total_minutes_by_account[account_key] = total_minutes

total_minutes = total_minutes_by_account.values()

import numpy as np

Tracking Down

num_problem_students = 0

for enrollment in enrollments:
	student = enrollment['account_key']
	if student not in unique_engagement_students
		and enrollment['join_date'] != enrollment['cancel_date']:
	num_problem_student += 1

num_problem_students
def within_one_week(join_date, engagement_date):
	time_delta = engagement_date - join_date
	return time_delta.days < 7

def remove_free_trial_cancels(data):
	new_data = []
	for data_point in data:
		if data_point['account_key'] in paid_students:
			new_data.append(data_point)
		return new_data

total_minutes = total_minutes_by_account.values()

import numpy as numpy

print 'Mean:' np.pean(total_minutes)
print 'Standard deviation:', np.std(total_minutes)
print 'Minimum:', np.min(total_minutes)
print 'Maximum:', np.max(total_minutes)

student_with_max_minutes = None
max_minutes = 0

for student, total_minutes in total_minutes_by_account.items():
	if total_minutes > max_minutes:
		max_minutes = total_minutes
		student_with_max_minutes = student

CSVs

import unicodecsv
enrollments_filename=''
with open('enrollments.csv', 'rb') as f:
	reader = unicodecsv.DictReader(f)
	enrollements = list(reader)

with open('daily_engagement.csv', 'rb') as f:
	reader = unicodecsv.DictReader(f)
	daily_engagement = list(reader)

with open('project_submissions.csv', 'rb') as f:
	reader = unicodecsv.DictReader(f)
	project_submissions = list(reader)

daily_engagement = None
project_submissions = None
import unicodecsv

def read_csv(filename):
	with open(filename, 'rb') as f:
		reader = unicodecsv.DictReader(f)
		return list(reader)

enrollments = read_csv('')
daily_engagement = read_csv('')
project_submissions = read_csv('')

enrollment_num_rows = 0
enrollment_num_unique_students = 0

engagement_num_rows = 0
engagement_num_unique_students = 0

submission_num_rows = 0
submission_num_unique_students = 0

Google and iOS Design

Google Design
https://design.google.com/

Apple accessibility
https://developer.apple.com/accessibility/ios/

W3C Web content accessibility guideline
https://www.w3.org/TR/WCAG20/

Key thing is accessibility
-> try to make product accessible and useful

mobile
-> touch screen
-> color contrast ratio

Implement P controller

import random
import numpy as np
import matplotlib.pyplot as plt

class Robot(object):
def __init__(self, length=20.0):
self.x = 0.0
self.y = 0.0
self.orientation = 0.0
self.length = length
self.steering_noise = 0.0
self.distance_noise = 0.0
self.steering_drift = 0.0

def set(self, x, y, orientation):
self.x = x
self.y = y
self.orientation = orientation % (2.0 * np.pi)

def set_noise(self, steering_noise, distance_noise):
self.steering_noise = steering_noise
self.distance_noise = distance_noise

def set_steering_drift(self, drift):
self.steering_drift = drift

def move(self, steering, distance, tolerance=0.001, max_steering_angle=np.pi/4.0):

if steering > max_steering_angle:
steering = max_steering_angle
if steering < -max_steering_angle: steering = -max_steering_angle if distance < 0.0: distance = 0.0 steering2 = random.gauss(steering, self.steering_noise) distance2 = random.gauss(distance, self.distance_noise) steering2 += self.steering_drift turn = np.tan(steering2) * distance2 / self.length if abs(turn) < tolerance: self.x += distance2 * np.cos(self.orientation) self.y += distance2 * np.sin(self.orientation) self.orientation = (self.orientation + turn) % (2.0 * np.pi) else: radius = distance2 / turn cx = self.x - (np.sin(self.orientation) * radius) cy = self.y + (np.cos(self.orientation) * radius) self.orientation = (self.orietation + turn) % (2.0 + np.pi) self.x = cx + (np.sin(self.orientation) * raidus) self.y = cy - (np.cos(self.orientation) * raidus) def __repr__(self): return '[x=%.5f y=%.5f orient=%.5f]' % (self.x, self.y, self.orientation) [/python]