Financial Data Analysis 3 Dangdang shop product crawler - crawler books as an example-- requests&bs4

Case (II) crawler preview

Project one: Dangdang online shop product crawler - crawler books as an example

This case is a crawl of relevant content using the bs4 library find method.

 -*- coding: utf-8 -*-
import requests
import csv
from bs4 import BeautifulSoup as bs
#Access to web information
def request_dandan(url):
    try:
        #User Agents
        headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36'}
        r = requests.get(url,headers=headers)
        if r.status_code == 200:
            return r.text
    except requests.RequestException:
        return None

#Storage column name
def write_item_to_file():
    csv_file = open('dangdang.csv', 'w', newline='', encoding="utf-8")
    writer = csv.writer(csv_file)
    writer.writerow(['书名','购买链接','纸质书价格','电子书价格','电子书链接','书的详细介绍','书的封面地址','评论地址','作者','出版时间','出版社'])
    print('列名已成功放入CSV中')
#Parsing web pages and writing to csv files
def parse_dangdang_write(html):
    csv_file = open('dangdang.csv', 'a', newline='')
    writer = csv.writer(csv_file)
    #Parsing web pages
    soup = bs(html, 'html.parser')
    class_tags = ['line'+str(x) for x in range(1,61)]
    for class_tag in class_tags:
        li = soup.find('li',class_=class_tag)
        book_name = li.find('a',class_='pic').get('title') # Book Title
        paperbook_price = li.find('span',class_='search_now_price').text  #Paperback prices
        try:
            ebook_price = li.find('a',class_='search_e_price').find('i').text  #E-book prices
            ebook_link = li.find('a',class_='search_e_price').get('href') #Ebook links
        except:
            ebook_price = ''
            ebook_link = ''
        detail = li.find('p',class_='detail').text #Book Details
        book_purchase_link = li.find('a',class_='pic').get('href') #Detailed purchase links for each book
        book_cover_link = li.find('a',class_='pic').find('img').get('src')#Book cover address
        comment_link = li.find('a',class_='search_comment_num').get('href') #Comment Address
        author = li.find('p',class_='search_book_author').find('span').text # Author of the book
        public_time = li.find('p',class_='search_book_author').find('span').next_sibling.text[2:]#Publication date
        public = li.find('p',class_='search_book_author').find('span').next_sibling.next_sibling.text[3:]#Publisher
        writer.writerow([book_name, book_purchase_link, paperbook_price, ebook_price, ebook_link, detail, book_cover_link, comment_link, author, public_time, public])
        #writer.writerow(['book title', 'buy link', 'paperback price', 'ebook price', 'ebook link', 'book details', 'book cover address', 'review address', 'author', 'publication date', 'publisher'])
    csv_file.close()

if __name__ == '__main__':
    write_item_to_file()
    for page in range(1, 10):  # Crawl 9 pages of data into a csv file
        url = 'http://search.dangdang.com/?key=python%C5%C0%B3%E6&act=input&page_index=' + str(page)
        html = request_dandan(url)  # Access to web information
        parse_dangdang_write(html)  # Parsing web pages and writing to csv files
        print('第{}页数据成功放入CSV中'.format(page))

Results of the run. 在这里插入图片描述

Case (II) crawler preview​

Project one: Dangdang online shop product crawler - crawler books as an example​

Case (II) crawler preview

Project one: Dangdang online shop product crawler - crawler books as an example