Commit 6f9d10e5 authored by rico.liu's avatar rico.liu

init

parent 0e3b9fd7
Pipeline #130 failed with stages
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Jan 8 11:00:57 2020
@author: rico
"""
from lxml import etree
import re
import requests
import json
def get_reponse(session,url,headers):
'''
deal timeout request
'''
network_status = True
try:
response = session.get(url, headers=headers, timeout=5)
if response.status_code == 200:
return response
except:
network_status = False
if network_status == False:
'''timeout'''
for i in range(1, 10):
print('请求超时,第%s次重复请求' % i)
try:
response = session.get(url, headers=headers, timeout=5)
if response.status_code == 200:
return response
except:
continue
return -1
def checkData(check_data):
headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36'}
session = requests.Session()
print('共'+str(len(check_data))+'条数据待审核')
result = []
id_all = []
sku_list = []
name_list = []
url_list = []
source_list = []
for i in range(len(check_data)):
df = check_data.loc[i]
date_id = df['id']
sku = df['sku']
name = df['name']
main_url = df['url'].strip()
source = df['source']
print(main_url)
if "jd" in str(main_url):
r = get_reponse(session,main_url,headers)
html = etree.HTML(r.text)
ziying = html.xpath(
"//div[@class='name goodshop EDropdown']/em/text()")
if "自营" in str(ziying):
name = html.xpath(
"//div[@class='sku-name']/text()")
if ("定制"in str(name)) or ("防弹"in str(name)) or ("射击"in str(name)) or ("订制"in str(name)) or ("卫星"in str(name)) or ("靶"in str(name)) or ("企业定制"in str(name)) or ("军迷"in str(name)) or ("携行具"in str(name)):
print("定制/专用类产品暂不通过")
result.append("定制/专用类产品暂不通过")
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
print("通过")
result.append("通过")
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
print("非自营,请按要求提供在销渠道证明")
result.append("非自营,请按要求提供在销渠道证明")
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
elif "gome" in str(main_url):
#try_ = session.get(main_url,headers=headers)
main_url_1 = re.findall(".cn/(.*?).html",main_url)[0]
main_url_ = 'https://item.gome.com.cn/' + main_url_1 + '.html'
r = get_reponse(session,main_url_,headers)
html = etree.HTML(r.text)
#content = html.xpath("//script[contains(text(),'gomePrice')]/text()")[0]
ziying = html.xpath(
"//span[@class='identify']/text()")
if len(ziying) == 1:
name = html.xpath(
"//*[@id='gm-prd-main']/div[1]/h1/text()")
if ("定制"in str(name)) or ("防弹"in str(name)) or ("射击"in str(name)) or ("订制"in str(name)) or ("卫星"in str(name)) or ("靶"in str(name)) or ("企业定制"in str(name)):
print("定制/专用类产品暂不通过")
result.append("定制/专用类产品暂不通过")
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
print("通过")
result.append("通过")
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
print("非自营,请按要求提供在销渠道证明")
result.append("非自营,请按要求提供在销渠道证明")
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
elif "suning" in str(main_url):
#sku = main_url.split('.html')[0].split('/')[-1].replace('-','/')
#main_url_ = 'http://product.suning.com/0000000000/10115687173.html'
#main_url = 'http://product.suning.com/0000000000/10530903341.html'
r = get_reponse(session,main_url,headers)
html = etree.HTML(r.text)
#daaa = r.text
#sn_price = df.价格
#sn_price = '58.00-558.00'
str2 = html.xpath("//input[@id='curPartNumber']/@value")[0]
ziying1 = html.xpath("//div[@class='proinfo-title']/h1/span/i/text()")
ziying2 = html.xpath("//h1[@id='itemDisplayName']/span/text()")
#youhuo_ = re.findall("id=\"ie7_onsale\" >(.*?)<i",daaa)
if "自营" in ziying1 or "自营" in ziying2:
#daohuo = html.xpath("//a[@id='tellMe']/span/text()")
url_json = f'https://product.suning.com/pds-web/ajax/itemUniqueInfo_{str(str2)}_0000000000.html'
response_json = get_reponse(session,url_json,headers)
json_data = json.loads(response_json.text)
itemDetail = json_data["itemDetail"]
try:
isPublished = itemDetail["isPublished"]
except:
isPublished = '0'
try:
product_name = itemDetail["cmmdtyTitle"]
except:
product_name = '满足要求名称'
if ("定制"in str(product_name)) or ("防弹"in str(product_name)) or ("射击"in str(product_name)) \
or ("订制"in str(product_name)) or ("卫星"in str(product_name)) \
or ("靶"in str(product_name)) or ("企业定制"in str(product_name)) \
or ("军迷"in str(product_name)) or ("携行具"in str(product_name)):
result.append("定制/专用类产品暂不通过")
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
id_all.append(date_id)
else:
str11 = html.xpath("//input[@id='curPartNumber']/@value")[0]
str22 = html.xpath("//input[@id='shop_code']/@value")[0]
str33 = html.xpath("//input[@name='procateCode']/@value")[0]
#默认收货地址为北京市丰台区
real_url = f'https://pas.suning.com/nspcsale_0_{str11}_{str11}_{str22}_10_010_0100100_157122_1000000_9017_10106_Z001___{str33}.html?callback=pcData'
price_response = requests.get(real_url)
sn_price = re.findall('"promotionPrice":"(.*?)",', price_response.text)[0]
if len(sn_price) != 0:
try:
print('通过')
sn_price = float(sn_price)
#price.append(sn_price)
result. append('通过')
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
except:
print('该链接无法定位到唯一商品')
result. append('该链接无法定位到唯一商品')
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
print('无货,请按要求提供在销渠道证明')
result. append('无货,请按要求提供在销渠道证明')
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
print('非自营,请按要求提供在销渠道证明')
result. append('非自营,请按要求提供在销渠道证明')
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
else:
print("非自营,请按要求提供在销渠道证明")
result.append("非自营,请按要求提供在销渠道证明")
id_all.append(date_id)
sku_list.append(sku)
name_list.append(name)
url_list.append(main_url)
source_list.append(source)
print(f"已经处理{i+1}条数据")
check_data['审核意见'] = result
check_data['update_id'] = id_all
check_data['sku'] = sku_list
check_data['name'] = name_list
check_data['url'] = url_list
check_data['source'] = source_list
return check_data
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment