proginn21304097622022年07月28日
63阅读

作品详情

import timeimport reimport csvimport jiebaimport wordcloud# import fakerfrom faker import Fakerfx = Faker()from selenium import webdriverdrive = webdriver.Chrome()drive.get('https://item.jd.com/100026667910.html')drive.execute_script('window.scrollTo(0,document.body.scrollHeight)')time.sleep(2)drive.maximize_window()time.sleep(2)drive.find_element_by_xpath('//div[@class="ETab"]/div/ul/li[5]').click()time.sleep(2)tm = 2li = []com = 0for t in range(1000): drive.execute_script('window.scrollTo(0,document.body.scrollHeight)') time.sleep(4) drive.execute_script('window.scrollTo(0,document.body.scrollHeight)') aa = drive.find_element_by_xpath('./html') td = aa.size fd = td['height'] - 2426 drive.execute_script(f'window.scrollTo(0,{fd})') #精准打击 # drive.execute_script('window.scrollTo(0,-1420)') time.sleep(2) tm1 = drive.find_elements_by_xpath('//div[@id="comment-0"]/div')[:10] # print(len(tm1)) for i in tm1: worth = {} what = i.find_element_by_xpath('.//div[2]/p').text if len(re.findall('\n', what)) == 0: worth['京东评论'] = what else: why = what.replace('\n', '') worth['京东评论'] = why com += len(worth) print(f'已爬取{com}条数据') li.append(worth) print(f'第{t+1}页爬完') if com != 1000: if tm < 6: tm += 1 drive.find_element_by_xpath(f'//div[@class="ui-page"]/a[{tm}]').click() time.sleep(2) else: drive.find_element_by_xpath(f'//div[@class="ui-page"]/a[{tm}]').click() time.sleep(2) else: breakwith open('jd.csv', 'w', encoding='utf-8', newline='')as f: #将评论的数据保存到jd.csv中 write = csv.DictWriter(f, fieldnames=['京东评论']) write.writeheader() write.writerows(li) # 读取文本,就是刚刚保存的文本。然后做一个词云图with open("jd.csv", encoding="utf-8") as f: s = f.read() # print(s)ls = jieba.lcut(s) # 生成分词列表text = ' '.join(ls) # 连接成字符串print(text)# stopwords = ["& hellip", "n", "&%", 'vcontent'] # 去掉不需要显示的词wc = wordcloud.WordCloud(font_path="msyh.ttc", width=1000, height=700, background_color='white', max_words=100, stopwords=s) # msyh.ttc电脑本地字体,写可以写成绝对路径wc.generate(text) # 加载词云文本wc.to_file("京东好评爬取.png") # 保存词云文件
查看全文
声明:本文仅代表作者观点,不代表本站立场。如果侵犯到您的合法权益,请联系我们删除侵权资源!如果遇到资源链接失效,请您通过评论或工单的方式通知管理员。未经允许,不得转载,本站所有资源文章禁止商业使用运营!
下载安装【程序员客栈】APP
实时对接需求、及时收发消息、丰富的开放项目需求、随时随地查看项目状态

评论