Population density

def ensure_float(v):
	if is_number(v):
		return float(v)

def audit_population_density(input_file):
	for row in input_file:
		population = ensure_float(row['populationTotal'])
		area = ensure_float(row['areaLand'])
		population_density = ensure_float(row['populationDensity'])
		if population and area and population_density:
			calculated_density = population / area
			if math.fabs(calculated_density - population_density) > 10:
				print "Possibly bad population density for ", row['name']

if __name__ == '__main__':
	input_file = csv.DictReader(open("cities.csv"))
	skip_lines(input_file, 3)
	audit_population_density(input_file)

Using blue print

import xml.etree.cElementTree as ET
from collections import defaultdict
import re

osm_file = open("chicago_abbrev.osm", "r")

street_type_re = re.compile(r'\S+\.?$', re.IGNORECASE)
street_types = defaultdict(int)

def audit_street_type(street_types, street_name):
	m = street_type_re.search(street_name)
	if m:
		street_type = m.group()
		street_types[street_type] += 1

def print_sorted_dict(d):
	keys = d.keys()
	keys = sorted(keys, key=lambda s: s.lower())
	for k in keys:

def is_street_name(elem):
	return (elem.tag == "tag") and (elem.attrib['k'] == "addr:street")

def audit():
	for event, elem in ET.iterparse(osm_file):
		if is_street_name(elem):
			audit_street_type(street_types, elem.attrib['v'])
	print_sorted_dict(street_types)

if __name__ == '__main__':
	audit()

Scraping solution

from bs4 import BeautifulSoup

s = requests.Session()

r = s.get("http://www.transtats.bts.gov/Data_Elements.aspx?Data=2")
soup = BeautifulSoup(r.text)
viewstate_element = soup.find(id="__VIEWSTATE")
viewstate = viewstate_element["value"]
eventvalidation_element = soup.find(id="__EVENTVALIDATION")
eventvalidation = eventvalidation_element["value"]

r = s.post("http://www.transtats.bts.gov/Data_Elements.aspx?Data=2",
	data={'AirportList' : "BOS",
		'CarrierList' : "VX",
		'Submit' : "Submit",
		'__EVENTTARGET' : "",
		'__EVENTVALIDATION' : eventvalidation,
		'__VIEWSTATE' : viewstate})

f = open("virgin_and_logan_airport.html", "w")
f.write(r.text)

Parsing XML

import xml.etree.ElementTree as ET
import pprint

tree = ET.parse('exampleResearchArticle.xml')
root = tree.getroot()

print "\nChildren of root:"
for child in root:
	print child.tag
import xml.etree.ElementTree as ET
import pprint

tree = ET.parse('exampleResearchArticle.xml')
root = tree.getroot()

title = root.find('./fm/bibl/title')
title_text = ""
for p in title:
	title_text += p.text
print "\nTitle:\n", title_text

print "\nAuthor email addresses:"
for a in root.findall('./fm/bibl/aug/au'):
	email = a.find('email')
	if email is not None:
		print email.text

XLRD

#!/usr/bin/env python

import xlrd
from zipfile import zipfile
datafile = "2013_ERCOT_Hourly_Load_Data.xls"

def open_zip(datafile):
	with ZipFile('{0}.zip'.format(datafile),'r') as myzip:
		myzip.extractall()

def parse_file(datafile):
	workbook = xlrd.open_workbook(datafile)
	sheet = workbook.sheet_by_index(0)

	data = [[sheet.cell_value(r, col)
			for col in range(sheet.ncols)]
				for r in range(sheet.nrows)]

	cv = sheet.col_value(1, start_rowx=1, end_rowx=None)

	maxval = max(cv)
	minval = min(cv)

	maxpos = cv.index(maxval) + 1
	minpos = cv.index(minval) + 1

	maxtime = sheet.cell_value(maxpos, 0)
	realtime = xlrd.xldate_as_tuple(maxtime, 0)
	mintime = sheet.cell_value(minpos, 0)
	realmintime = xlrd.xldate_as_tupple(mintime, 0)

	data = {
		'maxtime':(0,0,0,0,0,0),
		'maxvalue': 0,
		'mintime': (0,0,0,0,0,0),
		'minvalue': 0,
		'avgcoast': 0
	}
	return data

def test():
	open_zip(datafile)
	data = parse_file(datafile)

	assert data['maxtime'] == (2013, 8, 13, 17, 0, 0)
	assert round(data['maxvalue'], 10) == round(18779.02551, 10)

Reading Excel file

import xlrd

datafile = "2013_ERCOT_Hourly_Load_Data.xls".

def parse_file(datafile):
	workbook = xlrd.open_workbook(datafile)
	sheet = workbook.sheet_by_index(0)

	data = [[sheet.cell_value(r, col)
			for col in range(sheet.ncols)]
				for r in range(sheet.nrows)]

	print "\nList Comprehension"
	print "data[3][2]:",
	print data[3][2]

	print "\nCells in a nested loop:"
	for row in range(sheet.nrows):
		for col in range(sheet.ncols):
			if row == 50:
				print sheet.cell value(row, col).
import xlrd

datafile = "2013_ERCOT_Hourly_Load_Data.xls".

def parse_file(datafile):
	workbook = xlrd.open_workbook(datafile)
	sheet = workbook.sheet_by_index(0)

	data = [[sheet.cell_value(r, col)
			for col in range(sheet.ncols)]
				for r in range(sheet.nrows)]

	data = {
			'maxtime': (0, 0, 0, 0, 0, 0),
			'maxvalue': 0,
			'mintime': (0, 0, 0, 0, 0, 0),
			'minvalue': 0,
			'avgcoast': 0
	}
	return data

data = parse_file(datafile)

assert data['maxtime'] == (2013, 8, 13, 17, 0, 0)
assert round(data['maxvalue'], 10) == round(18779,02551, 10)

Using CSV Module

import os
import pprint
import csv

DATADIR = ""
DATAFILE = "beatles-diskography.csv"

def parse_csv(datafile):
	data = []
	n = 0
	with open(datafile,'rb') as sd:
		r = csv.DictReader(sd)
		for line in r:
			data.append(line)
	return data

if __name__ == '__main__':
	datafile = os.path.join(DATADIR, DATAFILE)
	parse_scv(datafile)
	d = parse_csv(datafile)
	pprint.pprint(d)

Parsing CSV Files

import os

DATADIR = ""
DATAFILE = "beatles-diskography.csv"


def parse_file(datafile):
    data = []
    with open(datafile, "r") as f:
    	header = f.readline().split(",")
    	counter = 0
        for line in f:
            if counter == 10:
            	break

            fields = line.split(",")
            entry = {}

            for i, value in enumerate(fields):
            	entry[header[i].strip()] = value.strip()

            data.append(entry)
            counter += 1

    return data


def test():
    # a simple test of your implemetation
    datafile = os.path.join(DATADIR, DATAFILE)
    d = parse_file(datafile)
    firstline = {'Title': 'Please Please Me', 'UK Chart Position': '1', 'Label': 'Parlophone(UK)', 'Released': '22 March 1963', 'US Chart Position': '-', 'RIAA Certification': 'Platinum', 'BPI Certification': 'Gold'}
    tenthline = {'Title': '', 'UK Chart Position': '1', 'Label': 'Parlophone(UK)', 'Released': '10 July 1964', 'US Chart Position': '-', 'RIAA Certification': '', 'BPI Certification': 'Gold'}

    assert d[0] == firstline
    assert d[9] == tenthline
Title,Released,Label,UK Chart Position,US Chart Position,BPI Certification,RIAA Certification
Please Please Me,22 March 1963,Parlophone(UK),1,-,Gold,Platinum
With the Beatles,22 November 1963,Parlophone(UK),1,-,Platinum,Gold
Beatlemania! With the Beatles,25 November 1963,Capitol(CAN),-,-,,
Introducing... The Beatles,10 January 1964,Vee-Jay(US),-,2,,
Meet the Beatles!,20 January 1964,Capitol(US),-,1,,5xPlatinum
Twist and Shout,3 February 1964,Capitol(CAN),-,-,,
The Beatles' Second Album,10 April 1964,Capitol(US),-,1,,2xPlatinum
The Beatles' Long Tall Sally,11 May 1964,Capitol(CAN),-,-,,
A Hard Day's Night,26 June 1964,United Artists(US)c,-,1,,4xPlatinum
,10 July 1964,Parlophone(UK),1,-,Gold,
Something New,20 July 1964,Capitol(US),-,2,,Platinum
Beatles for Sale,4 December 1964,Parlophone(UK),1,-,Gold,Platinum
Beatles '65,15 December 1964,Capitol(US),-,1,,3xPlatinum
Beatles VI,14 June 1965,"Parlophone(NZ), Capitol(US)",-,1,,Platinum
Help!,6 August 1965,Parlophone(UK),1,-,Platinum,
,13 August 1965,Capitol(US) c,-,1,,3xPlatinum
Rubber Soul,3 December 1965,Parlophone(UK),1,-,Platinum,
,6 December 1965,Capitol(US) c ,-,1,,6xPlatinum
Yesterday and Today,15 June 1966,Capitol(US),-,1,,2xPlatinum
Revolver,5 August 1966,Parlophone(UK),1,-,Platinum,
,8 August 1966,Capitol(US) c,-,1,,5xPlatinum
Sgt. Pepper's Lonely Hearts Club Band,1 June 1967,"Parlophone(UK), Capitol(US)",1,1,3xPlatinum,11xPlatinum
Magical Mystery Tour,27 November 1967,"Parlophone(UK), Capitol(US)",31[D],1,Platinum,6xPlatinum
The Beatles,22 November 1968,"Apple(UK), Capitol(US)",1,1,Platinum,19xPlatinum
Yellow Submarine,13 January 1969,"Apple(UK), Capitol(US)",3,2,Silver,Platinum
Abbey Road,26 September 1969,"Apple(UK), Capitol(US)",1,1,2xPlatinum,12xPlatinum
Let It Be,8 May 1970,"Apple(UK),United Artists(US)",1,1,Gold,4xPlatinum

reduced satisfaction

def set_preprocessing(num_variables, clauses):
	rules_applicable = True
	temp_assignment = [None]*(num_variables*1)
	while rules_applicable == True:
		rules_applicable = False

		variable_counter = [0]*(num_vaiables*1)
		var_setting = [None]*(num_variable*1)

		for clause in clauses:
			for var in clause:
				avar = abs(var)
				variable_counter[avar] += 1
				var_setting[avar] = (1 if var > else 0)

		for i, var in enumerate(variable_counter):
			if var != 1:
				continue
			if temp_assignment[i] is not None:
				continue
			temp_assignment[i] = var_setting[i]

		for clause in clauses:
			assert len(clase) != 0
			if len(clause) > 1:
				continue
			var = clause[0]
			avar = abs(var)

cover by tree

def vertex_cover_tree(input_graph):
	n = len(input_graph)
	assignment = [None]*n
	return recursive_vertex_cover(input_graph, assignment)

def recursive_vertex_cover(input_graph, assignment):

	n = len(input_graph)
	v = -1

	for i in range(n):
		if assignment[i] == None:
			v = i
		for j in range(i, n):
			if input_graph[i][j] == 1:
				if assignment[i] == 0 and assignment[j] == 0:
					return float("inf")
	if v == -1:
		size = 0
		for i in range(n):
			if assignment[i] == 1:
				size += 1
		return size

	assignment[v] = 0
	size_v_0 = recursive_vertext_cover