init

745aa944 · rico.liu · cb5add1a · 745aa944 · 745aa944 · 745aa944
Commit 745aa944 authored Jan 09, 2020 by rico.liu
Hide whitespace changes
Inline Side-by-side

Showing with 585 additions and 0 deletions

checkData.py checkData.py +360 -0

initAPIData.py initAPIData.py +163 -0

matchSKU.py matchSKU.py +62 -0

No files found.
--- a/checkData.py
+++ b/checkData.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Wed Jan  8 11:00:57 2020
+@author: rico
+"""
+from lxml import etree
+import re
+import requests
+import json
+def get_response(session,url,headers):
+        '''
+        deal timeout request
+        '''
+        network_status = True 
+        try:
+            response = session.get(url, headers=headers, timeout=5)
+            if response.status_code == 200:
+                return response
+        except:
+            network_status = False 
+            if network_status == False:
+                '''timeout'''
+                for i in range(1, 10):
+                    print('请求超时，第%s次重复请求' % i)
+                    try:    
+                        response = session.get(url, headers=headers, timeout=5)
+                        if response.status_code == 200:
+                            return response
+                    except:
+                        continue
+        return -1
+def checkData(check_data):
+    headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36'}
+    session = requests.Session()
+    print('共'+str(len(check_data))+'条数据待审核')
+    result = []
+    id_all = []
+    sku_list = []
+    name_list = []
+    url_list = []
+    source_list = []
+    price_list = []
+    for i in range(len(check_data)):
+        df = check_data.loc[i]
+        date_id = df['id']
+        sku = df['sku']
+        name = df['name']
+        main_url = df['url'].strip()
+        source = df['source']
+        price = df['price']
+        print(main_url) 
+        if "jd" in str(main_url): 
+            try:
+                sku = main_url.split('/')[-1].split('.')[0]
+                #获取价格
+                url = "https://p.3.cn/prices/mgets?skuIds=" + str(sku)
+                r = get_response(session,url,headers).json()
+                jd_price = r[0]['p']
+                if len(jd_price) == 0:
+                    jd_price = price
+                if jd_price == '-1.00':
+                    jd_price = price
+                    price_list.append(jd_price)
+                    result.append("无货，请按要求提供在销渠道证明")
+                    id_all.append(date_id)
+                    sku_list.append(sku)
+                    name_list.append(name)
+                    url_list.append(main_url)
+                    source_list.append(source)
+                else:
+                    price_list.append(jd_price)
+                    #获取其他信息
+                    main_url_ = "https://item.jd.com/" + sku + ".html"
+                    r = get_response(session,main_url_,headers)
+                    html = etree.HTML(r.text)
+                    ziying = html.xpath(
+                        "//div[@class='name goodshop EDropdown']/em/text()")
+                    if "自营" in str(ziying):
+                            name = html.xpath(
+                                    "//div[@class='sku-name']/text()")
+                            if ("定制"in str(name)) or ("防弹"in str(name)) or ("射击"in str(name)) or ("订制"in str(name)) or ("卫星"in str(name)) or ("靶"in str(name)) or ("企业定制"in str(name)) or ("军迷"in str(name)) or ("携行具"in str(name)):
+                                print("定制/专用类产品暂不通过")
+                                result.append("定制/专用类产品暂不通过")
+                                id_all.append(date_id)
+                                sku_list.append(sku)
+                                name_list.append(name)
+                                url_list.append(main_url)
+                                source_list.append(source)
+                            else:
+                                #获取库存信息
+                                url = "https://c0.3.cn/stock?skuId="+str(sku)+"&area=1_2901_2906_0&cat=9987,653,655"
+                                r = get_response(session,url,headers)
+                                if r == -1:
+                                    print("通过")
+                                    result.append("通过")
+                                    id_all.append(date_id)
+                                    sku_list.append(sku)
+                                    name_list.append(name)
+                                    url_list.append(main_url)
+                                    source_list.append(source)
+                                else:
+                                    r.encoding='gbk'
+                                    is_purchase = json.loads(r.text)
+                                    try:
+                                        if  "无货" in is_purchase['stock']['stockDesc'] or "无货" in is_purchase['stock']['StockStateName']:
+                                            print("无货，请按要求提供在销渠道证明")
+                                            result.append("无货，请按要求提供在销渠道证明")
+                                            id_all.append(date_id)
+                                            sku_list.append(sku)
+                                            name_list.append(name)
+                                            url_list.append(main_url)
+                                            source_list.append(source)
+                                        else:
+                                            print("通过")
+                                            result.append("通过")  
+                                            id_all.append(date_id)
+                                            sku_list.append(sku)
+                                            name_list.append(name)
+                                            url_list.append(main_url)
+                                            source_list.append(source)
+                                    except:
+                                        if  "无货" in is_purchase['StockStateName']:
+                                            print("无货，请按要求提供在销渠道证明")
+                                            result.append("无货，请按要求提供在销渠道证明")
+                                            id_all.append(date_id)
+                                            sku_list.append(sku)
+                                            name_list.append(name)
+                                            url_list.append(main_url)
+                                            source_list.append(source)
+                                        else:
+                                            print("通过")
+                                            result.append("通过")
+                                            id_all.append(date_id)
+                                            sku_list.append(sku)
+                                            name_list.append(name)
+                                            url_list.append(main_url)
+                                            source_list.append(source)
+                    else:
+                        print("非自营，请按要求提供在销渠道证明")
+                        result.append("非自营，请按要求提供在销渠道证明")
+                        id_all.append(date_id)
+                        sku_list.append(sku)
+                        name_list.append(name)
+                        url_list.append(main_url)
+                        source_list.append(source)
+            except:
+                print("链接有误，请按要求提供在销渠道证明")
+                result.append("链接有误，请按要求提供在销渠道证明")
+                price_list.append(price)
+                id_all.append(date_id)
+                sku_list.append(sku)
+                name_list.append(name)
+                url_list.append(main_url)
+                source_list.append(source)
+        elif "gome" in str(main_url):
+            try:
+                main_url_1 = re.findall(".cn/(.*?).html",main_url)[0]
+                main_url_ = 'https://item.gome.com.cn/' + main_url_1 + '.html'
+                r = get_response(session,main_url_,headers)
+                html = etree.HTML(r.text)     
+                content = html.xpath("//script[contains(text(),'gomePrice')]/text()")[0]
+                gm_price = content.split('gomePrice:"')[1:][0].split('"')[0]
+                price_list.append(gm_price)
+                ziying = html.xpath(
+                    "//span[@class='identify']/text()")
+                if len(ziying) == 1:
+                        name = html.xpath(
+                                "//*[@id='gm-prd-main']/div[1]/h1/text()")
+                        if ("定制"in str(name)) or ("防弹"in str(name)) or ("射击"in str(name)) or ("订制"in str(name)) or ("卫星"in str(name)) or ("靶"in str(name)) or ("企业定制"in str(name)):
+                            print("定制/专用类产品暂不通过")
+                            result.append("定制/专用类产品暂不通过")
+                            id_all.append(date_id)
+                            sku_list.append(sku)
+                            name_list.append(name)
+                            url_list.append(main_url)
+                            source_list.append(source)
+                        else:
+                            #获取库存信息
+                            sku = main_url_.split('.html')[0].split('/')[-1].replace('-','/')
+                            url = "https://ss.gome.com.cn/item/v1/d/m/store/unite/"+str(sku)+"/N/11010200/110102002/1/null/flag/item/allStores?callback=allStores"
+                            r = get_response(session,url,headers)
+                            content = r.text.replace('allStores(','')
+                            content = content.replace(')','')
+                            content = json.loads(content)
+                            wuhuo = content['result']['stock']['status']
+                            if wuhuo == False:
+                                print("无货，请按要求提供在销渠道证明")
+                                result.append("无货，请按要求提供在销渠道证明")
+                                id_all.append(date_id)
+                                sku_list.append(sku)
+                                name_list.append(name)
+                                url_list.append(main_url)
+                                source_list.append(source)
+                            else:
+                                print("通过")
+                                result.append("通过")
+                                id_all.append(date_id)
+                                sku_list.append(sku)
+                                name_list.append(name)
+                                url_list.append(main_url)
+                                source_list.append(source)
+                else:
+                     print("非自营，请按要求提供在销渠道证明")
+                     result.append("非自营，请按要求提供在销渠道证明")
+                     id_all.append(date_id)
+                     sku_list.append(sku)
+                     name_list.append(name)
+                     url_list.append(main_url)
+                     source_list.append(source)
+            except:
+                print("链接有误，请按要求提供在销渠道证明")
+                result.append("链接有误，请按要求提供在销渠道证明")
+                price_list.append(price)
+                id_all.append(date_id)
+                sku_list.append(sku)
+                name_list.append(name)
+                url_list.append(main_url)
+                source_list.append(source)
+        elif "suning" in str(main_url):
+            try:
+                sku = re.findall(".com/(.*?).html",main_url)[0]
+                main_url_ = 'https://product.suning.com/' + sku + '.html'
+                r = get_response(session,main_url_,headers)
+                html = etree.HTML(r.text)
+                daaa = r.text
+                sn_price = price
+                str2 = html.xpath("//input[@id='curPartNumber']/@value")[0]
+                ziying1 = html.xpath("//div[@class='proinfo-title']/h1/span/i/text()")
+                ziying2 = html.xpath("//h1[@id='itemDisplayName']/span/text()")
+                youhuo_ = re.findall("id=\"ie7_onsale\" >(.*?)<i",daaa)
+                if "自营" in ziying1 or "自营" in ziying2:
+                    url_json = f'https://product.suning.com/pds-web/ajax/itemUniqueInfo_{str(str2)}_0000000000.html'
+                    response_json = get_response(session,url_json,headers)
+                    json_data = json.loads(response_json.text)
+                    itemDetail = json_data["itemDetail"]
+                    try:
+                        isPublished = itemDetail["isPublished"]
+                    except:
+                        isPublished = '0'
+                    product_name = itemDetail["cmmdtyTitle"]
+                    if isPublished == '1':
+                        if '此款有货' in str(youhuo_) :
+                            if ("定制"in str(product_name)) or ("防弹"in str(product_name)) or ("射击"in str(product_name)) \
+                                or ("订制"in str(product_name)) or ("卫星"in str(product_name)) \
+                                or ("靶"in str(product_name)) or ("企业定制"in str(product_name)) \
+                                or ("军迷"in str(product_name)) or ("携行具"in str(product_name)):
+                                print("定制/专用类产品暂不通过")
+                                result.append("定制/专用类产品暂不通过")
+                                price_list.append(sn_price)
+                                id_all.append(date_id)
+                                sku_list.append(sku)
+                                name_list.append(name)
+                                url_list.append(main_url)
+                                source_list.append(source)
+                            else:
+                                str11 = html.xpath("//input[@id='curPartNumber']/@value")[0]
+                                str22 = html.xpath("//input[@id='shop_code']/@value")[0]
+                                str33 = html.xpath("//input[@name='procateCode']/@value")[0]
+                                real_url = f'https://pas.suning.com/nspcsale_0_{str11}_{str11}_{str22}_10_010_0100100_157122_1000000_9017_10106_Z001___{str33}.html?callback=pcData'
+                                price_response = requests.get(real_url)
+                                sn_price = re.findall('"promotionPrice":"(.*?)",', price_response.text)[0]
+                                if len(sn_price) != 0:
+                                    price_list.append(sn_price)
+                                    print('通过')
+                                    result. append('通过')
+                                    id_all.append(date_id)
+                                    sku_list.append(sku)
+                                    name_list.append(name)
+                                    url_list.append(main_url)
+                                    source_list.append(source)
+                                else:
+                                    sn_price = price
+                                    price_list.append(sn_price)
+                                    print('该地区不销售（北京市丰台区）')
+                                    result. append('该地区不销售（北京市丰台区）')
+                                    id_all.append(date_id)
+                                    sku_list.append(sku)
+                                    name_list.append(name)
+                                    url_list.append(main_url)
+                                    source_list.append(source)
+                        else:
+                            print('无货，请按要求提供在销渠道证明')
+                            sn_price = price
+                            result. append('无货，请按要求提供在销渠道证明')
+                            price_list.append(sn_price)
+                            id_all.append(date_id)
+                            sku_list.append(sku)
+                            name_list.append(name)
+                            url_list.append(main_url)
+                            source_list.append(source)
+                    else:
+                        print('无货，请按要求提供在销渠道证明')
+                        sn_price = price
+                        result. append('无货，请按要求提供在销渠道证明')
+                        price_list.append(sn_price)
+                        id_all.append(date_id)
+                        sku_list.append(sku)
+                        name_list.append(name)
+                        url_list.append(main_url)
+                        source_list.append(source)
+                else:
+                    print('非自营，请按要求提供在销渠道证明')
+                    result. append('非自营，请按要求提供在销渠道证明')
+                    price_list.append(price)
+                    id_all.append(date_id)
+                    sku_list.append(sku)
+                    name_list.append(name)
+                    url_list.append(main_url)
+                    source_list.append(source)
+            except:
+                print("链接有误，请按要求提供在销渠道证明")
+                result.append("链接有误，请按要求提供在销渠道证明")
+                price_list.append(price)
+                id_all.append(date_id)
+                sku_list.append(sku)
+                name_list.append(name)
+                url_list.append(main_url)
+                source_list.append(source)
+        else:         
+            print("非三大电商，请按要求提供在销渠道证明")
+            result.append("非三大电商，请按要求提供在销渠道证明")
+            price_list.append(price)
+            id_all.append(date_id)
+            sku_list.append(sku)
+            name_list.append(name)
+            url_list.append(main_url)
+            source_list.append(source)
+        print(f"已经处理{i+1}条数据")  
+    check_data['审核意见'] = result
+    check_data['update_id'] = id_all
+    check_data['sku'] = sku_list
+    check_data['name'] = name_list
+    check_data['url'] = url_list
+    check_data['source'] = source_list
+    check_data['price'] = price_list
+    return check_data
\ No newline at end of file
--- a/initAPIData.py
+++ b/initAPIData.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Wed Jan  8 10:00:34 2020
+@author: rico
+"""
+import pymssql
+import pandas as pd
+import datetime
+#from pyforest import *
+from matchSKU import matchSKU
+from checkData import checkData
+'''
+JD_ZH/ZH_DL/ZH_DW/ZH_DZ/ZH_LXWL/ZH_OFS/ZH_SN/ZH_YHD/ZH_ZCSM
+初始化API数据包含SKU排重、三大电商数据审核
+'''
+def initAPIData(base_frm):
+    conn = pymssql.connect(host='123.56.115.207', user='zgcprice3311',password='admin@2018@)!*',database= f'{base_frm}',autocommit=True)
+    cursor = conn.cursor()
+    source = base_frm.replace('_','').replace('ZH','')
+    if source == 'JD':
+        pass
+    elif source == 'DL':
+        cursor.execute(f"insert into product_all(sku,name,brand,category,zi_subcategoryname,zi_subcategorycode,url,price,source,state)\
+                       select  distinct  a.sku,a.name,a.brand,f.name'sourcecategoryname',d.name'subcategoryname',c.zi_subcategorycode,a.url,e.price,'{base_frm}',a.state from \
+                       (select * from product_detail where sku not in (select sku from product_all ))a\
+                       left join Product_Category f\
+                       on a.category=f.categoryid\
+                       left join (select distinct zi_subcategorycode,sourcesubcategory  from ZI_DataBase.dbo.Product_Relation_Attribute_SubTitle where source='{source}') c\
+                       on f.name=c.sourcesubcategory\
+                       left join ZI_DataBase.dbo.zi_subcategory d\
+                       on c.zi_subcategorycode=d.subcategorycode\
+                       left join product_price e\
+                       on a.sku=e.sku")
+    else:
+        cursor.execute(f"insert into product_all(sku,name,brand,category,zi_subcategoryname,zi_subcategorycode,url,price,source,state)\
+                       select  distinct  a.sku,a.name,a.brand,a.category,d.name'subcategoryname',c.zi_subcategorycode,a.url,e.price,'{base_frm}',a.state from \
+                       (select * from product_detail where sku not in (select sku from product_all ))a\
+                       left join (select distinct zi_subcategorycode,sourcesubcategory  from ZI_DataBase.dbo.Product_Relation_Attribute_SubTitle where source='{source}') c\
+                       on a.category=c.sourcesubcategory\
+                       left join ZI_DataBase.dbo.zi_subcategory d\
+                       on c.zi_subcategorycode=d.subcategorycode\
+                       left join product_price e\
+                       on a.sku=e.sku")
+    if source in ['JD','SN','GM']:
+         #筛选出待审核的数据
+        cursor.execute("select id,sku,name,url,source,price from product_all where ziying is null and productcode is null")
+        data = (cursor.fetchall())
+        check_data = pd.DataFrame(data,columns= ['id','sku','name','url','source','price'])
+        #审核三大电商数据
+        check_data = checkData(check_data)
+        id_pass = []
+        id_no_ziying = []
+        id_dingzhi = []
+        id_other_nopass = []
+        print('补全价格')
+        for id_name,suggestion,price in zip(check_data['update_id'], check_data['审核意见'],check_data['price']):
+            cursor.execute( f"update product_all set price='{price}' where id= '{id_name}'")
+            conn.commit()
+            if suggestion == '通过':
+                id_pass.append(id_name)
+            elif suggestion == '非自营，请按要求提供在销渠道证明':
+                id_no_ziying.append(id_name)
+            elif suggestion == '定制/专用类产品暂不通过':
+                id_dingzhi.append(id_name)
+            else:
+                id_other_nopass.append(id_name)
+        print('更新数据库～')
+        for id_num in id_pass:
+            cursor.execute( f"update product_all set ziying='1',dingzhi='1',state='1' where id= '{id_num}'")
+            conn.commit()
+        for i in id_no_ziying:
+            cursor.execute( f"update product_all set ziying='2',dingzhi='1',state='2' where id='{i}'")
+            conn.commit()
+        for i in id_dingzhi:
+            cursor.execute( f"update product_all set ziying='1',dingzhi='2',state='2' where id='{i}'")
+            conn.commit()
+        for i in id_other_nopass:
+            cursor.execute( f"update product_all set ziying='2',dingzhi='1',state='2'  where id='{i}'")
+            conn.commit()
+        #SKU排重
+        cursor.execute("select distinct sku from product_all where productcode is null and state ='1'")
+        data = (cursor.fetchall())
+        sku_list = pd.DataFrame(data,columns= ['sku'])['sku'].tolist()
+        sku_check = matchSKU(sku_list,source)
+        print('sku排重中')
+        for k,v in sku_check.items():
+            if v == '0':
+                continue
+            else:
+                cursor.execute( f"update product_all set productcode='{v}' where sku='{k}'")
+                conn.commit()
+        print('sku排重完毕')    
+        cursor.close()
+        print('完成')
+        print('更新问题数据库')
+        conn = pymssql.connect(host='123.56.115.207', user='zgcprice3311',password='admin@2018@)!*',database= 'zi_zh',autocommit=True)
+        cursor = conn.cursor()
+        today = datetime.datetime.now().strftime("%Y/%m/%d %H:%M:%S")
+        for sku,name,url,remark,source in zip(check_data['sku'],check_data['name'] ,check_data['url'],check_data['审核意见'],check_data['source']):
+            if remark == '通过':
+                continue
+            cursor.execute( f"insert into API_returnErrorData (sku,name,url,remark,create_time,source) VALUES ('{sku}','{name}','{url}','{remark}','{today}','{source}')")
+        cursor.close() 
+        print('完成')
+        print('数据初始化处理完成')
+        return None
+    else:
+        #SKU排重
+        cursor.execute("select distinct sku from product_all where productcode is null")
+        data = (cursor.fetchall())
+        sku_list = pd.DataFrame(data,columns= ['sku'])['sku'].tolist()
+        sku_check = matchSKU(sku_list,source)
+        print('sku排重中')
+        for k,v in sku_check.items():
+            if v == '0':
+                continue
+            else:
+                cursor.execute( f"update product_all set productcode='{v}' where sku='{k}'")
+                conn.commit()
+        print('sku排重完毕')
+        print('更新数据库～')
+        cursor.execute("select id from product_all where ziying is null and productcode is null")
+        data = (cursor.fetchall())
+        check_data = pd.DataFrame(data,columns= ['id'])
+        id_list = check_data['id'].tolist()
+        for _id in id_list:
+            cursor.execute( f"update product_all set ziying='1',dingzhi='1',state='1' where id= '{_id}'")
+            conn.commit()
+        print('数据初始化处理完成')
+        return None
+if __name__ == "__main__":
+    initAPIData('ZH_SN')
--- a/matchSKU.py
+++ b/matchSKU.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Tue Jan  7 15:26:43 2020
+@author: rico
+"""
+import pymssql
+import pandas as pd
+'''
+DL/DW/DZ/GM/JD/LXWL/OFS/SN/YHD/ZCSM
+'''
+def matchSKU(sku_list,frm):
+    conn = pymssql.connect(host='123.56.115.207', user='zgcprice3311',password='admin@2018@)!*',database='ZI_DataBase')
+    cursor = conn.cursor()
+    print('正在删除库中产品状态为6的sku，稍等。。。')#首先将库中错误状态产品的JD/SN/GM的SKU删除
+    cursor.execute("DELETE from productcode_sku where productcode in (select productcode from info_product where state='6')")
+    conn.commit()
+    print('正在获取当前库中所有sku,请稍等.....')#获取库中所有的SKU,以供检查
+    try:
+        get_all_sku = f"select  * from productcode_sku where frm in {frm}"
+        cursor.execute(get_all_sku)
+        data_sku = (cursor.fetchall())
+        data_sku_file = pd.DataFrame(data_sku,columns=['id','productcode','sku','skuname','createddate','frm'])
+    except:
+        get_all_sku = f"select  * from productcode_sku where frm='{frm}'"
+        cursor.execute(get_all_sku)
+        data_sku = (cursor.fetchall())
+        data_sku_file = pd.DataFrame(data_sku,columns=['id','productcode','sku','skuname','createddate','frm'])
+    print('sku获取完毕')
+    conn.close()
+    cursor.close() 
+    sku_check = {}
+    for sku in set(sku_list):
+        #print(sku)
+        #sku = 100004460761
+        if str(sku) in list(data_sku_file['sku']):
+            try:
+                product_no = [target for sku_source,target in zip(list(data_sku_file['sku']),list(data_sku_file['productcode'])) if str(sku) == sku_source]
+                sku_check[f'{sku}'] = product_no[0]
+            except:
+                sku_check[f'{sku}'] = '2' #匹配上多个SKU，属于问题数据
+        else:
+            if frm == 'SN':
+                sku_add = '0000000000/'+ str(sku)
+                if str(sku_add) in list(data_sku_file['sku']):
+                    try:
+                        product_no = [target for sku_source,target in zip(list(data_sku_file['sku']),list(data_sku_file['productcode'])) if str(sku_add) == sku_source]
+                        sku_check[f'{sku}'] = product_no[0]
+                    except:
+                        sku_check[f'{sku}'] = '2' #匹配上多个SKU，属于问题数据
+                else:
+                    sku_check[f'{sku}'] = '0'
+            else:
+                sku_check[f'{sku}'] = '0'
+    return sku_check