import pandas as pd if False: s = pd.Series([1,2,3,4,5]) def add_one(x): return x + 1 print s.apply(add_one) names = pd.Series([ 'Andre Agassi', 'Barry Bonds', 'Christopher Columbus', 'Daniel Defoe', 'Emilio Estevez', 'Fred Flintstone', 'Greta Garbo', 'Humbert Humbert', 'Ivan Ilych', 'James Joyce', 'Keira Knightley', 'Lois Lane', 'Mike Myers', 'Nick Nolte', 'Ozzy Osbourne', 'Pablo Picasso', 'Quirinus Quirrell', 'Rachael Ray', 'Susan Sarandon', 'Tina Turner', 'Ugueth Urbina', 'Vince Vaughn', 'Woodrow Wilson', 'Yoji Yamada', 'Zinedine Zidane' ]) def reverse_names(names): split_name = name.split(" ") first_name = split_name[0] last_name = split_name[1] return last_name + ', ' + first_name
Vectorized Operations
vector 123 * scalor 3 = 123123123, 369, error
these are reasonable answers
More vectorized operation
math operation
add, subtract, multiply, divide, expertise
Logical opereation
&, |, ~
Comparison operations
>, >=, <, <=, ==, !=
import numpy as np if False: a = np.array([1, 2, 3, 4]) b = np.array([1, 2, 1, 2]) print a + b print a – b print a * b print a / b print a ** b if False a = np.array([1, 2, 3, 4]) b = 2 print a + b print a – b print a * b print a / b print a ** b if False: a = np.array([1,2,3,4,5]) b = np.array([5,4,3,2,1]) print a > b print a >= b print a < b print a <= b print a == b print a != b
code snippet
import numpy as np
a = np.array([1,2,3,4])
b = a
a += np.array([1,1,1,1])
print b
+= operates in-place while + does not
import pandas as pandas if False: s1 = pd.Series([1,2,3,4], index=['a','b','c','d']) s2 = pd.Series([10,20,30,40], index['a','b','c','d']) print s1 + s2 if False: s1 = pd.Series([1,2,3,4], index=['a','b','c','d']) s2 = pd.Series([10,20,30,40], index['a','b','c','d']) print s1 + s2 if False: s1 = pd.Series([1,2,3,4], index=['a','b','c','d']) s2 = pd.Series([10,20,30,40], index['a','b','c','d']) print s1 + s2 if False: s1 = pd.Series([1,2,3,4], index=['a','b','c','d']) s2 = pd.Series([10,20,30,40], index['a','b','c','d']) print s1 + s2
NumPy Array
import numpy as np countries = np.array([ 'Afghanistan', 'Albania', 'Algeria', 'Angola', 'Argentina', 'Armenia', 'Australia', 'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain', 'Bangladesh', 'Barbados', 'Belarus', 'Belgium', 'Belize', 'Benin', 'Bhutan', 'Bolivia', 'Bosnia and Herzegovina' ]) employment = np.array([ 55.70000076, 51.40000153, 50.5 , 75.69999695, 58.40000153, 40.09999847, 61.5 , 57.09999847, 60.90000153, 66.59999847, 60.40000153, 68.09999847, 66.90000153, 53.40000153, 48.59999847, 56.79999924, 71.59999847, 58.40000153, 70.40000153, 41.20000076 ]) if False: print countries[0] print countries[3] if False: print countries[0:3] print countries[:3] print countries[17:] print countries[:] if False: print countries.dtype print employment.dtype print np.array([0, 1, 2, 3]).dtype print np.array([1.0, 1.5, 2.0, 2.5]).dtype print np.array([True, False, True]).dtype print np.array(['AL', 'AK', 'AZ', 'AR', 'CA']).dtype if False: for country in countries: print 'Examining country {}'.format(country) for i in range(len(countries)): country = countries[i] country_employment = employment[i] print 'Country {} has employment {}'.format(country, country_employment) if False: print employment.mean() print employment.std() print employment.max() print employment.sum() def max_employment(countries, employment): max_country = None max_value = None return (max_country, max_value)
pandas and NumPy
Gapminder Data
-employment levels
-life expectancy
-GDP
-School Completion Rates
import pandas as pd daily_engagement = pd.read_csv('daily_engagement_full.csv') len(daily_engagement['acct'].unique())
One-dimensional data structures
Panda, NumPy(numerical Python)
Series -> built on Array
more features, simpler
Making histograms in python
data = [1,2,1,3,3,1,4,2] %matplotlib inline import matplotlib.pyplot as plt plt.hist(data)
Lots of different pieces of information to look at
These features can interact
plt.xlabel(“label for x axis”)
plt.ylabel(“label for y axis”)
plt.title(“title of plot”)
lesson completed
from collections import defaultdict engagemnt_by_account = defaultdict(list) for engagement_record in paid_engagement_in_first_week: account_key = engagement_record['account_key'] engagement_by_account[account_key].append(engagement_record) total_minutes_by_account = {} for account_key, engegement_for_student in engagement_by_account.items(): total_minutes = 0 for engagement_record in engagement_for_student: total_minutes += engagement_record['total_minutes_visited'] total_minutes_by_account[account_key] = total_minutes total_minutes = total_minutes_by_account.values() import numpy as np
Tracking Down
num_problem_students = 0 for enrollment in enrollments: student = enrollment['account_key'] if student not in unique_engagement_students and enrollment['join_date'] != enrollment['cancel_date']: num_problem_student += 1 num_problem_students
def within_one_week(join_date, engagement_date): time_delta = engagement_date - join_date return time_delta.days < 7 def remove_free_trial_cancels(data): new_data = [] for data_point in data: if data_point['account_key'] in paid_students: new_data.append(data_point) return new_data total_minutes = total_minutes_by_account.values() import numpy as numpy print 'Mean:' np.pean(total_minutes) print 'Standard deviation:', np.std(total_minutes) print 'Minimum:', np.min(total_minutes) print 'Maximum:', np.max(total_minutes) student_with_max_minutes = None max_minutes = 0 for student, total_minutes in total_minutes_by_account.items(): if total_minutes > max_minutes: max_minutes = total_minutes student_with_max_minutes = student
CSVs
import unicodecsv enrollments_filename='' with open('enrollments.csv', 'rb') as f: reader = unicodecsv.DictReader(f) enrollements = list(reader) with open('daily_engagement.csv', 'rb') as f: reader = unicodecsv.DictReader(f) daily_engagement = list(reader) with open('project_submissions.csv', 'rb') as f: reader = unicodecsv.DictReader(f) project_submissions = list(reader) daily_engagement = None project_submissions = None
import unicodecsv def read_csv(filename): with open(filename, 'rb') as f: reader = unicodecsv.DictReader(f) return list(reader) enrollments = read_csv('') daily_engagement = read_csv('') project_submissions = read_csv('') enrollment_num_rows = 0 enrollment_num_unique_students = 0 engagement_num_rows = 0 engagement_num_unique_students = 0 submission_num_rows = 0 submission_num_unique_students = 0
Google and iOS Design
Google Design
https://design.google.com/
Apple accessibility
https://developer.apple.com/accessibility/ios/
W3C Web content accessibility guideline
https://www.w3.org/TR/WCAG20/
Key thing is accessibility
-> try to make product accessible and useful
mobile
-> touch screen
-> color contrast ratio
Implement P controller
import random
import numpy as np
import matplotlib.pyplot as plt
class Robot(object):
def __init__(self, length=20.0):
self.x = 0.0
self.y = 0.0
self.orientation = 0.0
self.length = length
self.steering_noise = 0.0
self.distance_noise = 0.0
self.steering_drift = 0.0
def set(self, x, y, orientation):
self.x = x
self.y = y
self.orientation = orientation % (2.0 * np.pi)
def set_noise(self, steering_noise, distance_noise):
self.steering_noise = steering_noise
self.distance_noise = distance_noise
def set_steering_drift(self, drift):
self.steering_drift = drift
def move(self, steering, distance, tolerance=0.001, max_steering_angle=np.pi/4.0):
if steering > max_steering_angle:
steering = max_steering_angle
if steering < -max_steering_angle:
steering = -max_steering_angle
if distance < 0.0:
distance = 0.0
steering2 = random.gauss(steering, self.steering_noise)
distance2 = random.gauss(distance, self.distance_noise)
steering2 += self.steering_drift
turn = np.tan(steering2) * distance2 / self.length
if abs(turn) < tolerance:
self.x += distance2 * np.cos(self.orientation)
self.y += distance2 * np.sin(self.orientation)
self.orientation = (self.orientation + turn) % (2.0 * np.pi)
else:
radius = distance2 / turn
cx = self.x - (np.sin(self.orientation) * radius)
cy = self.y + (np.cos(self.orientation) * radius)
self.orientation = (self.orietation + turn) % (2.0 + np.pi)
self.x = cx + (np.sin(self.orientation) * raidus)
self.y = cy - (np.cos(self.orientation) * raidus)
def __repr__(self):
return '[x=%.5f y=%.5f orient=%.5f]' % (self.x, self.y, self.orientation)
[/python]