Mapper and Reducer

import sys
import string
import logging

from util import mapper_logfile
logging.basicConfig(filename=mapper_logfile, format='%(message)s',
	level=logging.INFO, filemode='w')

def mapper():
	for line in sys.stdin:

		data = line.strip().split(",")
		if len(data) != 12 or data[0] == 'Register':
			continue
		print "{0}\t{1}".format(data[3], data[8])

mapper()
import sys
import logging

from util import reducer_logfile
logging.basicConfig(filename=reducer_logfile, format='%(message)s',
	level=logging.INFO, filemode='w')

def reducer():

	aadhaar_generated = 0
	old_key = None

	for line in sys.stdin:
		data = line.strip().split("\t")

		if len(data) != 2:
			continue

		this_key, count = data
		if old_key and old_key != this_key:
			print "{0}\t{1}".format(old_key, aadhaar_generated)

			aadhaar_generated = 0

		old_key = this_key
		aadhaar_generated += float(count)

	if old_key != None:
		print "{0}\t{1}".format(old_key, aadhaar_generated)

reducer()

Mapreduce programming model -> HADOOP!
(1)Hive, (2)Pig
mahout, giraph, cassandra

Using Mapreduce with Subway data