update

a4413305 · rico.liu · 94ca7eb4 · a4413305
Commit a4413305 authored Mar 31, 2021 by rico.liu
Show whitespace changes
Inline Side-by-side

Showing with 15 additions and 3 deletions

ZOL_Crawler.py public/SKUParamsInfo/ZOL_Crawler.py +15 -3

No files found.
--- a/public/SKUParamsInfo/ZOL_Crawler.py
+++ b/public/SKUParamsInfo/ZOL_Crawler.py
@@ -144,6 +144,17 @@ class CRAWLER:
        source = source.text.replace('<br />','')          
        html = etree.HTML(source)
        
+        #get category and product name
+        try:
+            category = html.xpath("//div[@class='breadcrumb']/a[2]/text()")[0]
+        except:
+            category = '无法获取类别'
+        try:
+            product_name = html.xpath("//div[@class='breadcrumb']/a[4]/text()")[0]
+        except:
+            product_name = '无法获取产品名称'
+        
+        
        #get Zol attribute and value
        Zol_data = pd.DataFrame()
        attr_list = html.xpath("//span[contains(@id,'newPmName')]/text()")
@@ -194,7 +205,8 @@ class CRAWLER:
            if get_value == '':
                get_data_list.append("爬取不到数据") 
        
-        res = dict([(k,v) for k,v in zip(self.necessary_attrs,get_data_list)])
+        res = {'产品类别':category,'产品名称':product_name}
+        res.update(dict([(k,v) for k,v in zip(self.necessary_attrs,get_data_list)]))
        
        return res