Beautiful Soupを使ってAmazon Web Service
#!/usr/bin/env python # -*- coding: utf-8 -*- import urllib2 from BeautifulSoup import BeautifulStoneSoup class AmazonClient(): """ タイトルで検索 """ TITLE = "Title" """ 著者名で検索 """ AUTHOR = "Author" """ ISBNで検索 """ ISBN = "ISBN" # amazon URL __amazonUrl = "http://webservices.amazon.co.jp/onca/xml" # request default params __request_params = ("Service=AWSECommerceService", "AWSAccessKeyId=ACCESS_KEY_ID", "Operation=ItemSearch", "SearchIndex=Books", "ResponseGroup=Large", "ReviewPage=1") def __init__(self, searchParameters = {TITLE: "python"}): """ 初期化 """ self.request = self.__amazonUrl + "?" + "&".join(self.__request_params) if searchParameters: for key in searchParameters.keys(): self.request += "&" + "%s=%s" % (key, searchParameters[key]) def doRequest(self): """ リクエストする """ xml = urllib2.urlopen(self.request).read() soup = BeautifulStoneSoup(xml) items = soup.find("items") bookList = [] for item in items: if item.itemattributes: book = self.__createBookData(item) bookList.append(book) return bookList def __createBookData(self, item): if not item or not item.itemattributes: return else: book = BookData() if item.smallimage and item.smallimage.url: book.smallimageUrl = item.smallimage.url.contents[0] if item.mediumimage and item.mediumimage.url: book.mediumimageUrl = item.mediumimage.url.contents[0] if item.itemattributes.largeimage and item.largeimage.url: book.largeimage = item.largeimage.url.contents[0] book.title = item.itemattributes.title.contents[0] if item.itemattributes.findAll("author"): nameList = [] for name in item.itemattributes.findAll("author"): nameList.append(name.contents[0]) if len(nameList) > 0: book.author = ",".join(nameList) if item.itemattributes.isbn: book.isbn = item.itemattributes.isbn.contents[0] if item.customerreviews and item.customerreviews.averagerating: book.averagerating = item.customerreviews.averagerating.contents[0] if item.customerreviews: reviewList = [] for review in item.customerreviews.findAll("review"): reviewData = ReviewData() reviewData.rating = review.rating.contents[0] reviewData.summary = review.summary.contents[0] content = review.content.contents[0] reviewData.content = content.replace("<br />", "") reviewList.append(reviewData) book.reviewList = reviewList return book """ 本データ """ class BookData(): __to_str = "[タイトル: %s \r\n 著者: %s \r\n ISBN: %s]" def __init__(self): self.smallimageUrl = "" self.mediumimageUrl = "" self.largeimageUrl = "" self.title = "" self.author = "" self.isbn = "" self.price = 0 self.averagerating = 0.0 self.reviewList = [] def __str__(self): return BookData.__to_str % (self.title, self.author, self.isbn) """ レビューデータ """ class ReviewData(): __to_str = "[評価: %s \r\n タイトル: %s \r\n 本文: %s]" def __init__(self): self.rating = 0.0 self.summary = "" self.content = "" def __str__(self): return ReviewData.__to_str % (self.rating, self.summary, self.content) if __name__ == "__main__": params = {AmazonClient.AUTHOR: "東野圭吾"} client = AmazonClient(params) bookList = client.doRequest() for book in bookList: print book print
ACCESS_KEY_IDを自分のに変えるとちゃんと検索できる。
レビューもちゃんととれるし、日本語でも検索できる。
すばらしい、こんなに簡単にできる。
__request_paramsのパラメータを変えれば本以外や出版社等色々な条件できる。
[参考]
Amazon Web サービス入門
http://www.ajaxtower.jp/ecs/
http://d.hatena.ne.jp/aidiary/20081220/1229787394