import xml.etree.cElementTree as ET from collections import defaultdict import re osm_file = open("chicago_abbrev.osm", "r") street_type_re = re.compile(r'\S+\.?$', re.IGNORECASE) street_types = defaultdict(int) def audit_street_type(street_types, street_name): m = street_type_re.search(street_name) if m: street_type = m.group() street_types[street_type] += 1 def print_sorted_dict(d): keys = d.keys() keys = sorted(keys, key=lambda s: s.lower()) for k in keys: def is_street_name(elem): return (elem.tag == "tag") and (elem.attrib['k'] == "addr:street") def audit(): for event, elem in ET.iterparse(osm_file): if is_street_name(elem): audit_street_type(street_types, elem.attrib['v']) print_sorted_dict(street_types) if __name__ == '__main__': audit()
Category: algorithm
Scraping solution
from bs4 import BeautifulSoup s = requests.Session() r = s.get("http://www.transtats.bts.gov/Data_Elements.aspx?Data=2") soup = BeautifulSoup(r.text) viewstate_element = soup.find(id="__VIEWSTATE") viewstate = viewstate_element["value"] eventvalidation_element = soup.find(id="__EVENTVALIDATION") eventvalidation = eventvalidation_element["value"] r = s.post("http://www.transtats.bts.gov/Data_Elements.aspx?Data=2", data={'AirportList' : "BOS", 'CarrierList' : "VX", 'Submit' : "Submit", '__EVENTTARGET' : "", '__EVENTVALIDATION' : eventvalidation, '__VIEWSTATE' : viewstate}) f = open("virgin_and_logan_airport.html", "w") f.write(r.text)
Parsing XML
import xml.etree.ElementTree as ET import pprint tree = ET.parse('exampleResearchArticle.xml') root = tree.getroot() print "\nChildren of root:" for child in root: print child.tag
import xml.etree.ElementTree as ET import pprint tree = ET.parse('exampleResearchArticle.xml') root = tree.getroot() title = root.find('./fm/bibl/title') title_text = "" for p in title: title_text += p.text print "\nTitle:\n", title_text print "\nAuthor email addresses:" for a in root.findall('./fm/bibl/aug/au'): email = a.find('email') if email is not None: print email.text
XLRD
#!/usr/bin/env python import xlrd from zipfile import zipfile datafile = "2013_ERCOT_Hourly_Load_Data.xls" def open_zip(datafile): with ZipFile('{0}.zip'.format(datafile),'r') as myzip: myzip.extractall() def parse_file(datafile): workbook = xlrd.open_workbook(datafile) sheet = workbook.sheet_by_index(0) data = [[sheet.cell_value(r, col) for col in range(sheet.ncols)] for r in range(sheet.nrows)] cv = sheet.col_value(1, start_rowx=1, end_rowx=None) maxval = max(cv) minval = min(cv) maxpos = cv.index(maxval) + 1 minpos = cv.index(minval) + 1 maxtime = sheet.cell_value(maxpos, 0) realtime = xlrd.xldate_as_tuple(maxtime, 0) mintime = sheet.cell_value(minpos, 0) realmintime = xlrd.xldate_as_tupple(mintime, 0) data = { 'maxtime':(0,0,0,0,0,0), 'maxvalue': 0, 'mintime': (0,0,0,0,0,0), 'minvalue': 0, 'avgcoast': 0 } return data def test(): open_zip(datafile) data = parse_file(datafile) assert data['maxtime'] == (2013, 8, 13, 17, 0, 0) assert round(data['maxvalue'], 10) == round(18779.02551, 10)
Reading Excel file
import xlrd datafile = "2013_ERCOT_Hourly_Load_Data.xls". def parse_file(datafile): workbook = xlrd.open_workbook(datafile) sheet = workbook.sheet_by_index(0) data = [[sheet.cell_value(r, col) for col in range(sheet.ncols)] for r in range(sheet.nrows)] print "\nList Comprehension" print "data[3][2]:", print data[3][2] print "\nCells in a nested loop:" for row in range(sheet.nrows): for col in range(sheet.ncols): if row == 50: print sheet.cell value(row, col).
import xlrd datafile = "2013_ERCOT_Hourly_Load_Data.xls". def parse_file(datafile): workbook = xlrd.open_workbook(datafile) sheet = workbook.sheet_by_index(0) data = [[sheet.cell_value(r, col) for col in range(sheet.ncols)] for r in range(sheet.nrows)] data = { 'maxtime': (0, 0, 0, 0, 0, 0), 'maxvalue': 0, 'mintime': (0, 0, 0, 0, 0, 0), 'minvalue': 0, 'avgcoast': 0 } return data data = parse_file(datafile) assert data['maxtime'] == (2013, 8, 13, 17, 0, 0) assert round(data['maxvalue'], 10) == round(18779,02551, 10)
Using CSV Module
import os import pprint import csv DATADIR = "" DATAFILE = "beatles-diskography.csv" def parse_csv(datafile): data = [] n = 0 with open(datafile,'rb') as sd: r = csv.DictReader(sd) for line in r: data.append(line) return data if __name__ == '__main__': datafile = os.path.join(DATADIR, DATAFILE) parse_scv(datafile) d = parse_csv(datafile) pprint.pprint(d)
Parsing CSV Files
import os DATADIR = "" DATAFILE = "beatles-diskography.csv" def parse_file(datafile): data = [] with open(datafile, "r") as f: header = f.readline().split(",") counter = 0 for line in f: if counter == 10: break fields = line.split(",") entry = {} for i, value in enumerate(fields): entry[header[i].strip()] = value.strip() data.append(entry) counter += 1 return data def test(): # a simple test of your implemetation datafile = os.path.join(DATADIR, DATAFILE) d = parse_file(datafile) firstline = {'Title': 'Please Please Me', 'UK Chart Position': '1', 'Label': 'Parlophone(UK)', 'Released': '22 March 1963', 'US Chart Position': '-', 'RIAA Certification': 'Platinum', 'BPI Certification': 'Gold'} tenthline = {'Title': '', 'UK Chart Position': '1', 'Label': 'Parlophone(UK)', 'Released': '10 July 1964', 'US Chart Position': '-', 'RIAA Certification': '', 'BPI Certification': 'Gold'} assert d[0] == firstline assert d[9] == tenthline
Title,Released,Label,UK Chart Position,US Chart Position,BPI Certification,RIAA Certification Please Please Me,22 March 1963,Parlophone(UK),1,-,Gold,Platinum With the Beatles,22 November 1963,Parlophone(UK),1,-,Platinum,Gold Beatlemania! With the Beatles,25 November 1963,Capitol(CAN),-,-,, Introducing... The Beatles,10 January 1964,Vee-Jay(US),-,2,, Meet the Beatles!,20 January 1964,Capitol(US),-,1,,5xPlatinum Twist and Shout,3 February 1964,Capitol(CAN),-,-,, The Beatles' Second Album,10 April 1964,Capitol(US),-,1,,2xPlatinum The Beatles' Long Tall Sally,11 May 1964,Capitol(CAN),-,-,, A Hard Day's Night,26 June 1964,United Artists(US)c,-,1,,4xPlatinum ,10 July 1964,Parlophone(UK),1,-,Gold, Something New,20 July 1964,Capitol(US),-,2,,Platinum Beatles for Sale,4 December 1964,Parlophone(UK),1,-,Gold,Platinum Beatles '65,15 December 1964,Capitol(US),-,1,,3xPlatinum Beatles VI,14 June 1965,"Parlophone(NZ), Capitol(US)",-,1,,Platinum Help!,6 August 1965,Parlophone(UK),1,-,Platinum, ,13 August 1965,Capitol(US) c,-,1,,3xPlatinum Rubber Soul,3 December 1965,Parlophone(UK),1,-,Platinum, ,6 December 1965,Capitol(US) c ,-,1,,6xPlatinum Yesterday and Today,15 June 1966,Capitol(US),-,1,,2xPlatinum Revolver,5 August 1966,Parlophone(UK),1,-,Platinum, ,8 August 1966,Capitol(US) c,-,1,,5xPlatinum Sgt. Pepper's Lonely Hearts Club Band,1 June 1967,"Parlophone(UK), Capitol(US)",1,1,3xPlatinum,11xPlatinum Magical Mystery Tour,27 November 1967,"Parlophone(UK), Capitol(US)",31[D],1,Platinum,6xPlatinum The Beatles,22 November 1968,"Apple(UK), Capitol(US)",1,1,Platinum,19xPlatinum Yellow Submarine,13 January 1969,"Apple(UK), Capitol(US)",3,2,Silver,Platinum Abbey Road,26 September 1969,"Apple(UK), Capitol(US)",1,1,2xPlatinum,12xPlatinum Let It Be,8 May 1970,"Apple(UK),United Artists(US)",1,1,Gold,4xPlatinum
reduced satisfaction
def set_preprocessing(num_variables, clauses): rules_applicable = True temp_assignment = [None]*(num_variables*1) while rules_applicable == True: rules_applicable = False variable_counter = [0]*(num_vaiables*1) var_setting = [None]*(num_variable*1) for clause in clauses: for var in clause: avar = abs(var) variable_counter[avar] += 1 var_setting[avar] = (1 if var > else 0) for i, var in enumerate(variable_counter): if var != 1: continue if temp_assignment[i] is not None: continue temp_assignment[i] = var_setting[i] for clause in clauses: assert len(clase) != 0 if len(clause) > 1: continue var = clause[0] avar = abs(var)
cover by tree
def vertex_cover_tree(input_graph): n = len(input_graph) assignment = [None]*n return recursive_vertex_cover(input_graph, assignment) def recursive_vertex_cover(input_graph, assignment): n = len(input_graph) v = -1 for i in range(n): if assignment[i] == None: v = i for j in range(i, n): if input_graph[i][j] == 1: if assignment[i] == 0 and assignment[j] == 0: return float("inf") if v == -1: size = 0 for i in range(n): if assignment[i] == 1: size += 1 return size assignment[v] = 0 size_v_0 = recursive_vertext_cover
4color
from fourcolor import graph_is_4colorable def graph_is_3colorbale(g): h = [] for node in g: nn = node + [1] h.append(nn) h.append([1] * (len(g) + 1)) return graph_is_4colorable(h)