RPA-亚马逊商品评论采集。
背景
需求来源:跨境电商客户需要对指定商品的评论进行监控,该应用为上述场景的自动化实现
所属行业:电商
前置条件
暂无
亚马逊网址:Amazon.cn
操作流程
1. 第一步:循环模板表格中的商品链接,前往相应的商品详情页
2. 第二步:抓取页面上的文字元素
1)商品无评论
2)商品评论较少,无【查看全部xx条商品评论】按钮


3)商品评论较多,点击【查看全部xx条商品评论】进入评论页获取,并根据采集数量判断是否需要点击下一页

4)采集数量大于实际评论数量,下一页按钮置灰,直接退出循环

3. 第三步:下载评论图片/视频
若采集范围选中了下载评论图片或下载评论视频,且评论中含有图片或视频,则在指定文件夹中新建一个文件夹,命名为亚马逊商品评论附件,将图片或视频下载到该文件夹中,且将本地路径写入到结果文件的【评论图片/视频】列(命名可以自定义,如1.jpg,2.mp4,只要不重复就行)

4. 第四步:表格处理+网页关闭
将数据都写入结果文件后,处理表格样式
关闭打开的网页,运行完成后最小化浏览器窗口
3.结果展示

该RPA是通过自然机器人RPA产品软件完成,所属也是自然机器人。欢迎大家了解自然机器人。
主要代码:
1,获取excel文件
try:passif param_value_1 != None and os.path.splitext(param_value_1)[1] not in [".xlsx", ".xls", ".xlsm", ".xlsm"]:raise Exception('模板文件名不正确')if not os.path.exists(os.path.dirname(param_value_1)):raise Exception('模板文件所在目录不存在')if os.path.exists(param_value_4) !=True:raise Exception('文件夹路径不存在')if hasattr(ntr.app, 'office'):excel_obj_2 = ntr.app.office.excel.open(param_value_1, visible=True, readonly=False, password=r"",write_password=None, dispatch="auto")else:excel_obj_2 = ntr.app.microsoft.excel.open(param_value_1, visible=True, readonly=False, password=r"",write_password=None)file_path_1 = param_value_1except(SDKError,Exception) as e:passerror = '{0}'.format(e)logger.error('Error-打开Excel-出现异常 ' + error)Debug_Block_Error('Error-打开Excel-ooown9092t6z', error, True)sys.exit(1)
2,获取excel文件信息
try:sheet_obj_2 = excel_obj_2.get_sheet()cell_row_cnt_1 = sheet_obj_2.row_count()iamrko = sheet_obj_2.read(r"A" + str(2) + ":" + r"A" + str(cell_row_cnt_1))iamrko = sum(iamrko, [])
except:passlogger.error("获取excel信息出错")sys.exit(1)
3,获取评论,图片,视频的主要代码
def get_messagee(a,n,excel_cell_value_2,web_text_1):try:name = re.search(r'(.*?)', a)name = name.group(1)score = re.search(r'', a)score = score.group(1).split(".")[0]time = re.search(r'(.*?)', a)time = time.group(1)content = re.search(r'([\s]*?)(.*?)',a)if content != None:content = content.group(2)else:content = "无"path2 = ""if param_value_3 == "评论加评论视频" or param_value_3 =="评论加评论图片加评论视频":video = re.search(r'data-video-url="(.*?)"', a)if video != None:video = video.group(1)reps = requests.get(video)path = file_fujian + "\\"+str(n)while True:if os.path.exists(path+".mp4"):path = path+str(n)else:breakpath=path+".mp4"with open(path, "wb") as f:f.write(reps.content)path2 = path2+" "+pathif param_value_3 == "评论加评论图片" or param_value_3 == "评论加评论图片加评论视频":picture = re.findall(r'
',a)if picture != []:list = []for i in picture:j = re.sub(r'SY88', 'SL1600', i)if j not in list:list.append(j)ii = 1for o in list:reps = requests.get(o)path = file_fujian + "\\" + str(n) + "_" + str(ii)while True:if os.path.exists(path+".jpg"):path = path + str(ii)else:breakpath = path + ".jpg"with open(path, "wb") as f:f.write(reps.content)ii = ii + 1path2 = path2 + " " + pathregion = r"G" + str(n)sheet_obj_4.write(region, path2, start_row=1, start_col='A', max=100000)try:region = r"A" + str(n)sheet_obj_4.write(region, excel_cell_value_2, start_row=1, start_col='A', max=100000)region = r"B" + str(n)sheet_obj_4.write(region, web_text_1, start_row=1, start_col='A', max=100000)region = r"C" + str(n)sheet_obj_4.write(region, name, start_row=1, start_col='A', max=100000)region = r"D" + str(n)sheet_obj_4.write(region, score, start_row=1, start_col='A', max=100000)region = r"E" + str(n)sheet_obj_4.write(region, time, start_row=1, start_col='A', max=100000)region = r"F" + str(n)sheet_obj_4.write(region, content, start_row=1, start_col='A', max=100000)except:logger.error("messagee_写入错误")except:logger.error("get_messagee异常")
4,信息写入excel的主要代码
def get_message():try:passweb_element_cnt_3 = visual.element.element_count("element_1a786967-4b1e-4ba5-94f2-d62901dd6e09",elem_type="Chrome", window=web_obj_1, timeout=10)sleep(1)cell_row_cnt_3 = sheet_obj_4.row_count()n = cell_row_cnt_3for range_item_1 in range(int(1), int(web_element_cnt_3) + 1, int(1)):web_html_1 = visual.element.get_html("element_1a786967-4b1e-4ba5-94f2-d62901dd6e09", elem_type="Chrome",index=range_item_1, window=web_obj_1, timeout=10)n = n + 1get_messagee(web_html_1,n,excel_loop_item_1,web_text_1)sleep(1)except:logger.error("get_message_异常",n)
主线程代码:
try:for excel_loop_item_1 in iamrko:passi=i+1web_obj_1 = visual.browser.create("chrome", excel_loop_item_1, wait=True, visible=True, timeout=100)sleep(2)web_element_display_5 = visual.element.wait_appear(element="element_f373f2af-a11b-4570-8576-cfa320c068cd",elem_type="Chrome", index=1, window=web_obj_1,timeout=10)if (web_element_display_5 == True):try:os.makedirs(yanzhengma)except:passtry:web_html_2 = visual.element.get_html("element_f373f2af-a11b-4570-8576-cfa320c068cd", elem_type="Chrome",index=1, window=web_obj_1, timeout=10)except:logger.error("获取代码异常")print(web_html_2)try:pic = re.search('
num:a = num//10b = num%10if a == 0:# 获取网页相似元素文本get_message()web_obj_1.close()continuefor i in range(a):#获取网页相似元素文本get_message()#点击下一页操作if num <= (i+1)*10:break visual.element.click(element="element_8409c521-abc3-449d-a2a8-d0f07fbcfd96", elem_type="Chrome",click_type="left_once", simulate=False, index=1, window=web_obj_1,timeout=10)get_message2(b)web_obj_1.close()continueelse:c = param_value_2 // 10b = param_value_2 % 10if c == 0:if b > web_element_cnt_3:get_message2(web_element_cnt_3)else:get_message2(b)# 获取网页相似元素文本web_obj_1.close()continueelse:for i in range(c):#获取网页相似元素文本get_message()#点击下一页操作if param_value_2 <= (i+1)*10:breakvisual.element.click(element="element_8409c521-abc3-449d-a2a8-d0f07fbcfd96", elem_type="Chrome",click_type="left_once", simulate=False, index=1, window=web_obj_1,timeout=10)web_element_cnt_8 = visual.element.element_count( "element_1a786967-4b1e-4ba5-94f2-d62901dd6e09", elem_type="Chrome", window=web_obj_1,timeout=10)if web_element_cnt_8 < b:get_message2(web_element_cnt_8)else:get_message2(b)web_obj_1.close()continueexcept:logger.error("下载全部评论异常")else:web_element_cnt_3 = visual.element.element_count("element_809a9e72-8911-4667-a1f9-7386c6476abc",elem_type="Chrome", window=web_obj_1, timeout=10)try:if web_element_cnt_3 == 0:cell_row_cnt_3 = sheet_obj_4.row_count()n = cell_row_cnt_3n = n + 1region = r"A" + str(n)sheet_obj_4.write(region, excel_loop_item_1, start_row=1, start_col='A', max=100000)region = r"B" + str(n)sheet_obj_4.write(region, web_text_1, start_row=1, start_col='A', max=100000)region = r"C" + str(n)sheet_obj_4.write(region, "无", start_row=1, start_col='A', max=100000)region = r"D" + str(n)sheet_obj_4.write(region, "无", start_row=1, start_col='A', max=100000)region = r"E" + str(n)sheet_obj_4.write(region, "无", start_row=1, start_col='A', max=100000)region = r"F" + str(n)sheet_obj_4.write(region, "无", start_row=1, start_col='A', max=100000)web_obj_1.close()continueexcept:logger.error("无评论写入错误")web_obj_1.close()continueif (param_value_2 == "全部"):passget_message3()web_obj_1.close()continueelse:#统计相似元素数量,和输入值相比,if web_element_cnt_3
最后希望RPA运用越来越广。
本文来自互联网用户投稿,文章观点仅代表作者本人,不代表本站立场,不承担相关法律责任。如若转载,请注明出处。 如若内容造成侵权/违法违规/事实不符,请点击【内容举报】进行投诉反馈!
