init

6f9d10e5 · rico.liu · 0e3b9fd7 · 6f9d10e5
Commit 6f9d10e5 authored Jan 08, 2020 by rico.liu
Show whitespace changes
Inline Side-by-side

Showing with 236 additions and 0 deletions

checkData.py checkData.py +236 -0

No files found.
--- a/checkData.py
+++ b/checkData.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Wed Jan  8 11:00:57 2020
+@author: rico
+"""
+from lxml import etree
+import re
+import requests
+import json
+def get_reponse(session,url,headers):
+        '''
+        deal timeout request
+        '''
+        network_status = True 
+        try:
+            response = session.get(url, headers=headers, timeout=5)
+            if response.status_code == 200:
+                return response
+        except:
+            network_status = False 
+            if network_status == False:
+                '''timeout'''
+                for i in range(1, 10):
+                    print('请求超时，第%s次重复请求' % i)
+                    try:    
+                        response = session.get(url, headers=headers, timeout=5)
+                        if response.status_code == 200:
+                            return response
+                    except:
+                        continue
+        return -1
+def checkData(check_data):
+    headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36'}
+    session = requests.Session()
+    print('共'+str(len(check_data))+'条数据待审核')
+    result = []
+    id_all = []
+    sku_list = []
+    name_list = []
+    url_list = []
+    source_list = []
+    for i in range(len(check_data)):
+        df = check_data.loc[i]
+        date_id = df['id']
+        sku = df['sku']
+        name = df['name']
+        main_url = df['url'].strip()
+        source = df['source']
+        print(main_url) 
+        if "jd" in str(main_url): 
+            r = get_reponse(session,main_url,headers)
+            html = etree.HTML(r.text)
+            ziying = html.xpath(
+                        "//div[@class='name goodshop EDropdown']/em/text()")
+            if "自营" in str(ziying):
+                name = html.xpath(
+                        "//div[@class='sku-name']/text()")
+                if ("定制"in str(name)) or ("防弹"in str(name)) or ("射击"in str(name)) or ("订制"in str(name)) or ("卫星"in str(name)) or ("靶"in str(name)) or ("企业定制"in str(name)) or ("军迷"in str(name)) or ("携行具"in str(name)):
+                    print("定制/专用类产品暂不通过")
+                    result.append("定制/专用类产品暂不通过")
+                    id_all.append(date_id)
+                    sku_list.append(sku)
+                    name_list.append(name)
+                    url_list.append(main_url)
+                    source_list.append(source)
+                else:
+                    print("通过")
+                    result.append("通过")
+                    id_all.append(date_id)
+                    sku_list.append(sku)
+                    name_list.append(name)
+                    url_list.append(main_url)
+                    source_list.append(source)
+            else:
+                print("非自营，请按要求提供在销渠道证明")
+                result.append("非自营，请按要求提供在销渠道证明")
+                id_all.append(date_id)
+                sku_list.append(sku)
+                name_list.append(name)
+                url_list.append(main_url)
+                source_list.append(source)
+        elif "gome" in str(main_url):
+            #try_ = session.get(main_url,headers=headers)
+            main_url_1 = re.findall(".cn/(.*?).html",main_url)[0]
+            main_url_ = 'https://item.gome.com.cn/' + main_url_1 + '.html'
+            r = get_reponse(session,main_url_,headers)
+            html = etree.HTML(r.text)     
+            #content = html.xpath("//script[contains(text(),'gomePrice')]/text()")[0]    
+            ziying = html.xpath(
+                    "//span[@class='identify']/text()")
+            if len(ziying) == 1:
+                name = html.xpath(
+                        "//*[@id='gm-prd-main']/div[1]/h1/text()")
+                if ("定制"in str(name)) or ("防弹"in str(name)) or ("射击"in str(name)) or ("订制"in str(name)) or ("卫星"in str(name)) or ("靶"in str(name)) or ("企业定制"in str(name)):
+                    print("定制/专用类产品暂不通过")
+                    result.append("定制/专用类产品暂不通过")
+                    id_all.append(date_id)
+                    sku_list.append(sku)
+                    name_list.append(name)
+                    url_list.append(main_url)
+                    source_list.append(source)
+                else:
+                    print("通过")
+                    result.append("通过")
+                    id_all.append(date_id)
+                    sku_list.append(sku)
+                    name_list.append(name)
+                    url_list.append(main_url)
+                    source_list.append(source)
+            else:
+                print("非自营，请按要求提供在销渠道证明")
+                result.append("非自营，请按要求提供在销渠道证明")
+                id_all.append(date_id)
+                sku_list.append(sku)
+                name_list.append(name)
+                url_list.append(main_url)
+                source_list.append(source)
+        elif "suning" in str(main_url):
+            #sku = main_url.split('.html')[0].split('/')[-1].replace('-','/')
+            #main_url_ = 'http://product.suning.com/0000000000/10115687173.html'
+            #main_url = 'http://product.suning.com/0000000000/10530903341.html'
+            r = get_reponse(session,main_url,headers)
+            html = etree.HTML(r.text)
+            #daaa = r.text
+            #sn_price = df.价格
+            #sn_price = '58.00-558.00'
+            str2 = html.xpath("//input[@id='curPartNumber']/@value")[0]
+            ziying1 = html.xpath("//div[@class='proinfo-title']/h1/span/i/text()")
+            ziying2 = html.xpath("//h1[@id='itemDisplayName']/span/text()")
+            #youhuo_ = re.findall("id=\"ie7_onsale\" >(.*?)<i",daaa)  
+            if "自营" in ziying1 or "自营" in ziying2:
+                #daohuo = html.xpath("//a[@id='tellMe']/span/text()")
+                url_json = f'https://product.suning.com/pds-web/ajax/itemUniqueInfo_{str(str2)}_0000000000.html'
+                response_json = get_reponse(session,url_json,headers)
+                json_data = json.loads(response_json.text)
+                itemDetail = json_data["itemDetail"]
+                try:
+                    isPublished = itemDetail["isPublished"]
+                except:
+                    isPublished = '0'
+                try:
+                    product_name = itemDetail["cmmdtyTitle"]
+                except:
+                    product_name = '满足要求名称'
+                if ("定制"in str(product_name)) or ("防弹"in str(product_name)) or ("射击"in str(product_name)) \
+                            or ("订制"in str(product_name)) or ("卫星"in str(product_name)) \
+                            or ("靶"in str(product_name)) or ("企业定制"in str(product_name)) \
+                            or ("军迷"in str(product_name)) or ("携行具"in str(product_name)):
+                    result.append("定制/专用类产品暂不通过")
+                    sku_list.append(sku)
+                    name_list.append(name)
+                    url_list.append(main_url)
+                    source_list.append(source)
+                    id_all.append(date_id)
+                else:
+                    str11 = html.xpath("//input[@id='curPartNumber']/@value")[0]
+                    str22 = html.xpath("//input[@id='shop_code']/@value")[0]
+                    str33 = html.xpath("//input[@name='procateCode']/@value")[0]
+                    #默认收货地址为北京市丰台区
+                    real_url = f'https://pas.suning.com/nspcsale_0_{str11}_{str11}_{str22}_10_010_0100100_157122_1000000_9017_10106_Z001___{str33}.html?callback=pcData'
+                    price_response = requests.get(real_url)
+                    sn_price = re.findall('"promotionPrice":"(.*?)",', price_response.text)[0]
+                    if len(sn_price) != 0:
+                        try:
+                            print('通过')
+                            sn_price = float(sn_price)
+                            #price.append(sn_price)
+                            result. append('通过')
+                            id_all.append(date_id)
+                            sku_list.append(sku)
+                            name_list.append(name)
+                            url_list.append(main_url)
+                            source_list.append(source)
+                        except:
+                            print('该链接无法定位到唯一商品')
+                            result. append('该链接无法定位到唯一商品')
+                            id_all.append(date_id)
+                            sku_list.append(sku)
+                            name_list.append(name)
+                            url_list.append(main_url)
+                            source_list.append(source)
+                    else:
+                        print('无货，请按要求提供在销渠道证明')
+                        result. append('无货，请按要求提供在销渠道证明')
+                        id_all.append(date_id)
+                        sku_list.append(sku)
+                        name_list.append(name)
+                        url_list.append(main_url)
+                        source_list.append(source)
+            else:
+                print('非自营，请按要求提供在销渠道证明')
+                result. append('非自营，请按要求提供在销渠道证明')
+                id_all.append(date_id)
+                sku_list.append(sku)
+                name_list.append(name)
+                url_list.append(main_url)
+                source_list.append(source)
+        else:         
+            print("非自营，请按要求提供在销渠道证明")
+            result.append("非自营，请按要求提供在销渠道证明")
+            id_all.append(date_id)
+            sku_list.append(sku)
+            name_list.append(name)
+            url_list.append(main_url)
+            source_list.append(source)
+        print(f"已经处理{i+1}条数据")  
+    check_data['审核意见'] = result
+    check_data['update_id'] = id_all
+    check_data['sku'] = sku_list
+    check_data['name'] = name_list
+    check_data['url'] = url_list
+    check_data['source'] = source_list
+    return check_data
\ No newline at end of file