python for stock data

features:
1. strong scientific libraries
2. strongly maintained
3. fast

install pandas into Centos

$ sudo easy_install pandas

>>> import numpy
>>> numpy.version.version
'1.13.3'

Print last 5 rows of the data frame

import pandas as pd


def test_run():
    df = pd.read_csv("data/AAPL.csv")
    print(df[-5:])

if __name__ == "__main__":
    test_run()

compute max closing price of Apple and IBM

import pandas as pd

def get_max_close(sympol)
	
	df = pd.read_csv("data/{}.csv".format(symbol))
	return df['Close'].max()

def test_run():
	for symbol in ['AAPL', 'IBM']:
		print "Max close"
		print symbol, get_max_close(symbol)

if __name__ == "__main__": # if run standalone
	test_run()

Mapper

def mapper():

	for line in sys.stdin:

		data = line.strip.split("")

		for i in data:
			cleaned_data = i.translate(string.maketrans("",""), string.punctuation).lower()
			print "{0}\t{t}".format(cleaned_data,1)

			mapper()

Reduce stage -> reducer

import sys

def reducer():
	word_count = 0
	old_key = None

	for line in sys.stdin:
		data = line.strip().split("\t")

		if len(data) != 2:
			continue

		if old_key and old_key != this_key: 
			print"{0}\t{1}".format(old_key, word_count)
			word_count = 0

		old_key = this_key
		word_count += float(count)

	if old_key != None:
		print "{0}\t{1}".format(old_key, word_count)
#! /bin/bash

cat ../../data/aliceInWorderland.txt | python word_count_mapper.py | sort | python word_count_reducer.py

Groundwork

<title>My Website</title>
<div id="introduction">
	<p>
	welcome to my website!
	</p>
</div>
<div id="image-gallery">
	<p>
	This is my cat!
	<img src="cat.jpg" alt="Meow!">
	<a href="https://en.wikipedia.org/wiki/Cat">Learn more about cats!</a>
	</p>
</div>

while continue_crawl(search_history, target_url):
continue_crawl([‘https://en.wikipedia.org/wiki/Floating_point’], ‘https://en.wikipedia.org/wiki/Philosophy’)

def continue_crawl(search_history, target_url, max_steps=25):
	if search_history[-1] == target_url:
		print("We've found the target article!")
		return False
	elif len(search_history) > max_steps:
		print("The serach has gone on suspiciously long, aborting search!")
		return False
	elif search_history[-1] in search_history[:-1]:
		print("We've arrived at an article we've already seen, aborting search!")
		return False
	else:
		return True
import time
import urllib

import bs4
import requests

start_url = "https://en.wikipedia.org/wiki/Special:Random"
target_url = "https://en.wikipedia.org/wiki/Philosophy"

def find_first_link(url):
	response = requests.get(url)
	html = response.text
	soup = bs4.BeautifulSoup(html, "html.parser")

	content_div = soup.find(id="mw-content-text").find(class_="mw-parser-output")

	article_link = None

	for element in content_div.find_all("p", recursive=False):

		if element.find("a", recursive=False):
			article_link = element.find("a", recursive=False).get('href')

	if not article_link:
		return

	return first_link

def continue_crawl(search_history, target_url, max_steps=25):
	if search_history[-1] == target_url:
		print("We've found the target article!")
		return False
	elif len(search_history) > max_steps:
		print("The search has gone on suspiciously long, aborting search!")
		return False
	elif search_history[-1] in search_history[:-1]:
		print("We've arrived at an article we've already seen, aborting search!")
		return False
	else:
		return True

article_chain = [start_url]

while continue_crawl(article_chain, target_url):
	print(article_chain[-1])

	first_link = find_first_link(article_chain[-1])
	if not first_link:
		print("We've arrived at an article with no links, aborting search!")
		break

	article_chain.append(first_link)

	time.sleep(2)

Reading from a File

f = open('/my_path/my_file.txt', 'r')
with open('/my_path/my_file.txt', 'r') as f:
	file_data = f.read()

camelot_lines = []
with open("camelot.txt") as f:
	for line in f:
		camelot_lines.append(line.strip())

print(camelot_lines)
def create_cast_list(filename):
	cast_list = []
	with open(filename) as f:

		for line in f:
			line_data = line.split(',')
			cast_list.append(line_data[0])
		return cast_list

The Python Standard Library is organised into parts called modules. Many modules are simply Python files, like the Python scripts you’ve already used and written. In order to be able to use the code contained in a module we must import it, either in the interactive interpreter or in a Python script of our own.

The syntax for importing a module is simply import package_name.

>>> import math
>>> print(math.factorial(3))
6

from module_name import object_name
e.g.
from collections import defaultdict

>>> import multiprocessing as mp
>>> mp.cpu_count()
4

import an individual item from a module and give it a different name
from module_name import object_name as different_name
from csv import reader as csvreader

Python Standard Library
csv, collections, random, string, re, math, os, os.path, sys, json

word_file = “words.txt”
word_list = []

with open(word_file, ‘r’) as words:
for line in words:
word = line.strip().lower()
if 3 < len(word) < 8 word_list.append(word) [/python] [python] def generate_password(): return random.choice(word_list) + random.choice(word_list) + random.choice(word_list) def generate_password(): return str().join(random.sample(word_list,3)) [/python]

Tuple

>>> print(type(AngkorWat))

>>> print("Angkor wat is at latitude: {}".format(AngkorWat[0]))
Angkor wat is at latitude: 13.4125
>>> print("Angkor wat is at longitude: {}".format(AngkorWat[1]))
Angkor wat is at longitude: 103.866667
>>> dimensions = 52, 40, 100
>>> length, width, height = dimensions
>>> print("the dimensions are {}x{}x{}".format(length, width, height))
the dimensions are 52x40x100
world_heritage_locations = {(13.4125, 103.866667): "Angkor Wat",
						(25.73333, 32.6): "Ancident Thebes",
						(30.330556, 35.4433330): "Petra",
						(-13.116667, -72.583333): "Machu Picchu"}
def box(width, height, symbol):
	print(symbol * width)

	for _ in range(height-2):
		print(symbol + " " * (width-2) + symbol)

	print(symbol + width)
def print_list(l, numbered, bullet_character):
	for index, element in enumerate(l):
		if numbered:
			print("{}: {}".format(index+1, element))
		else:
			print("{} {}".format(bullet_character, element))

def word_count(document, search_term):
“”” Count how many times search_term appears in document. “””
words = document.split()
answer = 0
for word in words:
if word == search_term:
answer += 1
return answer

def nearest_square(limit):
“”” Find the largest square number smaller than limit.”””
answer = 0
while (answer+1)**2 < limit: answer += 1 return answer**2 [/python]

Dictionaries

Rather than storing single objects like lists and sets do, dictionaries store pairs of elements: keys and values.

elements = {'hydrogen': 1, 'helium': 2, 'carbon': 6}
>>> print(element['carbon'])
6
>>> elements['lithium'] = 3
>>> print(elements['lithium'])
3
populations = {'Shanghai':17.8, 'Istanbul':13.3, 'Karachi':13.0, 'Mumbai':12.5}
if 'mithril' in elements:
	print("That's a real element!")
else:
	print("There's no such element")
>>> elements.get('dilithium')
>>> elements['dilithium']

Traceback (most recent call last):
  File "", line 1, in 
    elements['dilithium']
KeyError: 'dilithium'
>>> elements.get('kryptonite', 'There\'s no such element!')
"There's no such element!"
colors = set(['Pathalo Blue', 'Indian Yellow', 'Sap Green'])
for color in colors:
	print(color)
>>> elements = {'hydrogen': {'number':1, 'weight':1.00794, 'symbol':'H'},
	    'helium':{'number':2, 'weight':4.002602, 'symbol':'He'}}
>>> print(elements['helium'])
{'symbol': 'He', 'number': 2, 'weight': 4.002602}
>>> print(elements.get('unobtainium', 'There\'s no such element!'))
There's no such element!
>>> print(elements['helium']['weight'])
4.002602

Reorganizing code

Factoring: decomposing a complex problem into simpler parts.
Refactoring: restructuring existing code.

Reading the code, is it clear what each piece does? How could it be be easier?
If you needed to change some part of the functionality, would that be easy? Would you have to change the same thing in several places?
If you break down what the function does into steps, how many steps are there? It’s best to have each function doing only one thing.
Is there unnecessary repetition? Does every piece of code get used? Could anything be more succinct whilst still being readable? This is called the DRY (Don’t Repeat Yourself) principle.

def check_answers(my_answers, answers):
	results = [None, None, None, None, None]
	if my_answers[0] == answers[0]:
		results[0] = True
	elif my_answers[0] != answers[0]:
		result[0] = False
	if my_answers[1] == answers[1]:
		result[1] = True
	elif my_asnwers[1] != anwers[0]:
		results[1] = False
	if my_answers[2] == answers[2]:
        results[2] = True
    elif my_answers[2] != answers[2]:
        results[2] = False
    if my_answers[3] == answers[3]:
        results[3] = True
    elif my_answers[3] != answers[3]:
        results[3] = False
    if my_answers[4] == answers[4]:
        results[4] = True
    elif my_answers[4] != answers[4]:
        results[4] = False
    count_correct = 0
    count_incorrect = 0
    for result in results:
    	if result == True:
    		count_correct += 1
    	if result != True:
    		count_incorrect += 1
    if count_correct/5 > 0.7:
    	return "Congratulations, you passed the test! you scored " + str(count_correct) + " out of 5."
    elif count_incorrect/5 >= 0.3:
    	return "Unfortunately, you did not pass. You scored " + str(count_correct) + " out of 5."
>>> len(countries)
785
>>> countries[:5]
['Angola', 'Maldives', 'India', 'United States', 'India']
def remove_duplicates(source):
	target = []

	for element in source:
		if element not in target:
			target.append(element)

	return target
country_set = set(countries)
len(country_set)

country_set.add("Florin")

squares = set()

def nearest_square(limit):
answer = 0
while (answer+1)**2 < limit: answer += 1 return answer**2 n = 1 while n**2 < 2000: squares.add(n**2) n += 1 [/python]

while loop

card_deck = [4, 11, 8, 5, 13, 2, 8, 10]
hand = []

while sum(hand) <= 17:
	hand.append(card_deck.pop())

print(hand)
&#91;/python&#93;

&#91;python&#93;
manifest = &#91;&#91;"bananas", 15&#93;, &#91;"mattresses", 34&#93;, &#91;"dog kennels", 42&#93;,&#91;"machine that goes ping!", 120&#93;, &#91;"tea chests", 10&#93;, &#91;"cheeses", 0&#93;&#93;

cargo_weight = 0
cargo_hold = &#91;&#93;

for cargo in manifest:
	if cargo_weight >= 100:
		break
	else:
		cargo_hold.append(cargo[0])
		cargo_weight += cargo[1]
cargo_weight = 0
cargo_hold = []

for cargo in manifest:
	print("debug: the weight is currently: {}".format(cargo_weight))
	if cargo_weight >= 100:
		print("debug: breaking loop now!")
		break
	else:
		print("debug: adding item: {}".format(cargo[0]))
		print("debug: with weight: {}".format(cargo[1]))
		cargo_hold.append(cargo[0])
		cargo_weight += cargo[1]

For Loops

>>> names = ['charlotte hippopotamus turner', 'oliver st. john-mollusc', 'nigel incubator-jones', 'philip diplodocus mallory']
>>> for name in names:
	print(name.title())

	
Charlotte Hippopotamus Turner
Oliver St. John-Mollusc
Nigel Incubator-Jones
Philip Diplodocus Mallory
def list_sum(input_lists):
    sum = 0
    for input_list in input_lists:
        sum = sum + input_list
    return sum

test1 = list_sum([1, 2, 3])
print("expected result: 6, actual result: {}".format(test1))

test2 = list_sum([-1, 0, 1])
print("expected result: 0, actual result: {}".format(test2))
>>> names = ['charlotte hippopotamus turner', 'oliver st. john-mollusc', 'nigel incubator-jones', 'philip diplodocus mallory']
>>> capitalized_name = []
>>> for name in names:
	capitalized_name.append(name.title())

	
>>> print(capitalized_name)
['Charlotte Hippopotamus Turner', 'Oliver St. John-Mollusc', 'Nigel Incubator-Jones', 'Philip Diplodocus Mallory']

>>> for index in range(len(names)):
	names[index] = names[index].title()
>>> for i in range(3):
	print("Camelot!")

	
Camelot!
Camelot!
Camelot!
>>> print("It's only a model.")
It's only a model.
for _ in range(height-2):
	print("*" + " " * (width-2) + "*")
def starbox(width, height):
	print("*" * width)

	for _ in range(height-2):
		print("*" + " " * (width-2) + "*")

	print("*" * width)

Joining Lists

>>> nautical_directions = "\n".join(["fore","aft","starboard","port"])
>>> print(nautical_directions)
fore
aft
starboard
port

>>> names = ["Garcia", "O'Kelly", "Davis"]
>>> "-".join(names)
"Garcia-O'Kelly-Davis"

Note that join will trigger an error if we try to join anything other than strings.

>>> python_vareties.append('Blood Python')
>>> print(python_vareties)
['Burmese Python', 'African Rock Python', 'Ball Python', 'Reticulated Python', 'Angolan Python', 'Blood Python']
def median(numbers):
	numbers.sort()
	if len(numbers) % 2:
		middle_index = int(len(numbers)/2)
		return numbers[middle_index]
	else:
		right_of_middle = len(number)//2
		left_of_middle = right_of_middle -1
		return (numbers[right_of_middle] + numbers[left_of_middle])/2