Terminology for AI

1.Fully versus partially observablez

-perception action cycle
Agent, State
(sensors, actuators)

2.Deterministic versus stochastic

3.Discrete versus continuous

4.Benign(no objective) versus adversarial(such as chess, games)

for example:
robot car -> partially observable, stochastic, continuous, adverial

The Basic of AI

A AI Program is called
・INTELLIGENT AGENT

how does agent make a decision?
AI has successfully been used in
-finance
-robotics
-games
-medicine
-the web

ex.
trading agent for stock market, bonds market, commodity
->with online news, buy or sell decision

AI in Robotics
camera, microphone, touch
-> motors, voice

AI in games
 game agent play against you. your moves and own moves.

AI in medicine
diagnostic agent get vital signals

AI on the web
crawler

ReducerCode

def reducer():
	salesTotal = 0
	oldKey = None

	for line in sys.stdin:
		data = line.strip().split("\t")

		if len(data) != 2
			continue

		thisKey, thisSale = data

		if OldKey, thisSale = data
			print "{0}\t{1}".format(oldKey, salesTotal)

			salesTotal = 0

		oldKey = thisKey
		salesTotal += float(thisSale)

Defensive Mapper

def mapper():
	for line in sys.stdin:
		data = line.strip().split("\t")
		date, time, store, item, cost, payment = data
		print "{0}\t{1}".format(store, cost)

Using match

def get_db(db_name):
	from pymongo import MongoClient
	client = MongoClient('localhost:27017')
	db = client[db_name]
	return db

def make_pipeline():
	pipeline = [ ]
	return pipeline

def aggregate(db, pipeline):
	return [doc for doc in db.tweets.aggregate(pipeline)]

if __name__ == '__main__':
	db = get_db('twitter')
	pipeline = make_pipeline()
	result = aggregate(db, pipeline)
	import pprint
	assert len(result) == 1
	assert result[0]["followers"] == 17209

twitter data-set

{
	"_id" : ObjectID("xxxx"),
	"text" : "Something interesting ...",
	"entities" : {
		"user_mentions" : [
			{
				"screen_name" : "somebody_else",
				...
			}
		],
		"urls" : [],
		"hashtags": []
	},
	"user" : {
		"friends_count" : 544,
		"screen_name" : "somebody",
		"followers_count" : 100,
	}
}
from pymongo import MongoClient
import pprint

client = MongoClient("mongodb://localhost:27017")
db = client.twitter

def most_tweets():
	result = db.tweets.aggregate([
			{ "$group" : {"_id" : "$user.screen_name",
				"count": {"$sum" : 1}}},
			{ "$sort" : {"count" : -1 }}
		])
	return result

if __name__ == '__main__':
	result = most_tweets()
	pprint.pprint(result)

Insert into the DB

import json

def insert_data(data, db)

	passs

if __name__ == "__main__":

	from pymongo import MongoClient
	client = MongoClient("mongodb://localhost:27017")
	db = client.examples

	with open('arachnid.json') as f:
		data = json.loads(f.read())
		insert_data(data, db)
		print db.arachnid.find_one()

Preparing data

import codecs
import csv
import json
import pprint
import re

DATAFILE = 'arachnid.csv'
FIELDS ={'rdf-schema#label': 'label',
         'URI': 'uri',
         'rdf-schema#comment': 'description',
         'synonym': 'synonym',
         'name': 'name',
         'family_label': 'family',
         'class_label': 'class',
         'phylum_label': 'phylum',
         'order_label': 'order',
         'kingdom_label': 'kingdom',
         'genus_label': 'genus'}

def process_file(filename, fields):
	process_fields = fields.keys()
	data = []
	with open(filename, "r") as f:
		reader = csv.DictReader(f)
		for i in range(3):
			l = reader.next()

		for line in reader:
			pass
	return data

def parse_array(v):
	if(v[0] == "{") and (v[-1] == "}"):
		v = v.lstrip("{")
		v = v.rstrip("}")
		v_array = v.split("|")
		v_array = [i.strip() for i in v_array]
		return v_array
	return [v]

def test():
	data = process_file(DATAFILE, FIELDS)
	print "your first entry:"
	pprint.pprint(data[0])
	first_entry = {
		"synonym": None,
		"name": "Argiope",
		"classification" : {
			"kingdom":"Animal",
			"family":"Orb-weaver spider",
			"order": "Spider",
			"phylum": "Arthropod",
			"genus": None,
			"class": "Arachnid"
		},
		"uri": "http://dbpedia.org/resource/Argiope_(spider)",
		"label":"Argiope",
		"description": "The genus Argiope includes rather large and spectacular spiders that often have a strikingly coloured abdomen. These spiders are distributed throughout the world. Most countries in tropical or temperate climates host one or more species that are similar in appearance. The etymology of the name is from a Greek name meaning silver-faced."
	}
	assert len(data) = 76
	assert data[0] == first_entry
	assert data[17]["name"] == "Ogdenia"
	assert data[48]["label"] == "Hydrachnidiae"
	assert data[14]["synonym"] == ["Cyrene Peckham & Peckham"]

if __name__ == "__main__"
	test

Range Queries

#!/usr/bin/env python
import pprint

client = MongoClient("mongodb://localhost:27017")

db = client.examples

def find():
	query = {"population" : {"$gt" : 250000}}
	cities = db.cities.find(query)

	num_cities = 0
	for c in cities:
		pprint.pprint(c)
		num_cities += 1

	print "\nNumber of cities matching: %d\n" % num_cities
from datetime import datetime

def range_query():
	query = {}
	return query

def get_db():
	from pymongo import MongoClient
	client = MongoClient('localhost:27017')
	db = client.examples
	return db

if __name__ == "__main__":
	db = get_db()
	query = range_query()
	cities = db.cities.find(query)

	print "Found cities:", cities.count()
	import pprint
	pprint.pprint(cities[0])

Multiple Field

from pymongo import MongoClient
import pprint

client = MongoClient("mongodb://localhost:27017")

db = client.examples

def find():
	autos = db.autos.find(
		{
			"manufacturer" : "Toyota" , "class": "mid-size car"
		})
		for a in autos:
			pprint.pprint(a)

if __name__ == '__main__':
	find()
#!/usr/bin/env python
from autos import process_file

def insert_autos(infile, db):
	data = process_file(infile)

if __name__ == "__main__":
	from pymongo import MongoClient
	client = MongoClient("mongodb://localhost:27017")
	db = client.examples

	insert_autos('autos-small.csv', db)
	print db.autos.find_one()