修复了一些特殊情况下的报错，以及去除了列号输入，直接从表中读取列名

9e886ce1 · Zhouxingyu · 5d87ab32 · 9e886ce1 · 9e886ce1 · 9e886ce1
Commit 9e886ce1 authored Aug 09, 2019 by Zhouxingyu
7 changed files
--- a/__pycache__/data_add_class.cpython-35.pyc
+++ b/__pycache__/data_add_class.cpython-35.pyc
--- a/__pycache__/functions.cpython-35.pyc
+++ b/__pycache__/functions.cpython-35.pyc
--- a/data_add_class.py
+++ b/data_add_class.py
@@ -5,22 +5,34 @@ from functions import *
 class Data_add():
-    def __init__(self,table_path,out_path,pcode_col,brand_col,price_col,type_col,ser_col,data_col):
+    def __init__(self,table_path,out_path):
        '''
        依次输入表格路径，输出路径，productcode，品牌，价格，型号，系列对应的列号,以及一个原始数据或者不需要预测数据的列号。
        '''
        self._table = data_load(table_path).reset_index(drop = True)
        self._path_out = out_path
-        self._data_col = data_col
+        lyst = list(self._table.columns)   
-        self._pcode_col = pcode_col
+        for i in range(len(lyst)):
-        self._brand_col = brand_col
+            if lyst[i] == 'productname':
-        self._price_col = price_col
+                self._data_col = i
-        self._type_col = type_col
+            if lyst[i] == 'productcode':
-        self._ser_col = ser_col
+                self._pcode_col = i
+            if lyst[i] == 'brandname':
+                self._brand_col = i
+            if lyst[i] == 'price':
+                self._price_col = i
+            if lyst[i] == '产品型号':
+                self._type_col = i
+            if lyst[i] == '产品系列':
+                self._ser_col = i
+        try:
+            lyst_test = [self._data_col,self._pcode_col,self._brand_col,self._price_col,self._type_col,self._ser_col]
+        except AttributeError:
+            print('没有找到指定的列，请修改data_add_class中的代码或者检查excel表格。')
+        #print(self._data_col,self._pcode_col,self._brand_col,self._price_col,self._type_col,self._ser_col)
    def data_add_main(self):
-        print('数据装填完毕！')
        some_lyst=[self._data_col,self._pcode_col,self._price_col,self._brand_col,self._type_col]
        n = self._table.shape[1]   #列
        lyst=[]
@@ -31,6 +43,11 @@ class Data_add():
            else:
                lyst_null=moudle11(self._table,i,self._pcode_col)
                for j in range(len(lyst_null)):
-                    table=data_add(self._table,lyst_null[j],i,self._pcode_col,self._brand_col,self._price_col,self._type_col,self._ser_col)
+                    try:
+                        table=data_add(self._table,lyst_null[j],i,self._pcode_col,self._brand_col,self._price_col,self._type_col,self._ser_col)
+                    except AttributeError:
+                        return 0
+        print('数据装填完毕！')
+        loading()
        print("填写空缺值完毕！")
        pre(table,self._path_out)
\ No newline at end of file
--- a/functions.py
+++ b/functions.py
@@ -2,6 +2,7 @@ import difflib
 import re
 import pandas as pd
 import xlrd
+import time
 def str_split(string):
    '''
@@ -28,10 +29,14 @@ def string_similar(s1, s2):
    return difflib.SequenceMatcher(None, s1, s2).quick_ratio()
-def pcode2Line_num(table,pcode,col=2):
+def pcode2Line_num(table,pcode):
    '''
    productcode转行号，请根据pcode的实际列数设置col的数值。
    '''
+    lyst = list(table.columns)
+    for i in range(len(lyst)):
+        if lyst[i] == 'productcode':
+            col = i
    data_col = list(table.iloc[:, col])
    n= data_col.index(pcode)
    return n
@@ -55,10 +60,19 @@ def moudle9(table,mat_pcode,pre_pcode,col):
    return table
-def moudle10(table,pre_pcode,ser_col=15,type_col=9):
+def moudle10(table,pre_pcode,brand_col,pcode_col,ser_col=15,type_col=9):
    '''
    ser_col为系列号，type_col为型号。函数将型号按照规律赋给系列号。
    '''
+    brand = str(table.iloc[pcode2Line_num(table,pre_pcode),brand_col])
+    brand_lyst = list(table.iloc[:,brand_col])
+    same_brand_pcode_lyst = []
+    lyst = []
+    for i in range(len(brand_lyst)):
+        if brand_lyst[i] == brand:
+            same_brand_pcode_lyst.append(table.iloc[i,pcode_col])
+            lyst.append(i)
    str2pre=str_split(str(table.iloc[pcode2Line_num(table,pre_pcode),type_col]))
    str2out=[]
    if is_alphabet(str2pre[0])==False:
@@ -74,7 +88,8 @@ def moudle10(table,pre_pcode,ser_col=15,type_col=9):
                str2out.append(str2pre[i])
            else: break
    str2out=''.join(str2out)
-    table.iloc[pcode2Line_num(table,pre_pcode),ser_col]=str2out
+    for i in range(len(same_brand_pcode_lyst)):
+        table.iloc[pcode2Line_num(table,same_brand_pcode_lyst[i]),ser_col]=str2out
    return table
@@ -222,6 +237,9 @@ def moudle11(table,col,pcode_col):
            lyst.append(pcode_lyst[i])
    return lyst
+def loading():
+    print('正在填写预测数据。。。。')
+    time.sleep(15)
 def data_load(path):
    data = xlrd.open_workbook(path)
@@ -255,12 +273,13 @@ def xlsx_to_csv_pd(csv_path,xlsx_path):
    data_xls.to_csv(xlsx_path, encoding='utf-8')
-def moudle12(old_table,pre_pcode,price_col,pcode_col):
+def moudle12(old_table,pre_pcode,price_col,pcode_col,brand_col):
    '''
    在整个品牌都缺失某项参数，将价格最相近的产品的参数赋予这个空值。
    '''
    price_col_data = list(old_table.iloc[:, price_col])
    price_col_data = list(map(lambda x:float(x), price_col_data))          #字符串转化为浮点型
+    brand_col_data = list(old_table.iloc[:, brand_col])
    pcode_col_data = list(old_table.iloc[:, pcode_col])
@@ -269,10 +288,11 @@ def moudle12(old_table,pre_pcode,price_col,pcode_col):
    min_sub=abs(price_col_data[max_row]-price_line)
    for i in range(1,len(price_col_data)):
-        sub = abs(price_col_data[i]-price_line)
+        if  brand_col_data[i] != old_table.iloc[pcode2Line_num(old_table,pre_pcode),brand_col]:
-        if sub < min_sub:
+            sub = abs(price_col_data[i]-price_line)
-            min_sub = sub
+            if sub < min_sub:
-            max_row = i
+                min_sub = sub
+                max_row = i
    fit_pcode = pcode_col_data[max_row]
    return fit_pcode
@@ -307,9 +327,9 @@ def data_add(table,pcode,col,pcode_col,brand_col,price_col,type_col,ser_col):
        此处添加价格判断函数，寻找所有品牌价格最相近行。
        '''
        if col == ser_col:
-            table=moudle10(table,pcode,ser_col,type_col)
+            table=moudle10(table,pcode,brand_col,pcode_col,ser_col,type_col)
        else:
-            match_max_pcode = moudle12(table,pcode,price_col,pcode_col)
+            match_max_pcode = moudle12(table,pcode,price_col,pcode_col,brand_col)
            table=moudle9(table,match_max_pcode,pcode,col)
        return table
    table_new = table_new.reset_index(drop=True)           #reset

--- a/lib_nonstand-stand_new.csv
+++ b/lib_nonstand-stand_new.csv
--- a/lib_nonstand-stand_price.xlsx
+++ b/lib_nonstand-stand_price.xlsx
--- a/main.py
+++ b/main.py
@@ -15,16 +15,16 @@ def main():
    table_path = ("lib_nonstand-stand_price.xlsx")
    out_path = ("lib_nonstand-stand_new.csv")
+    '''
    data_col=int('3')   #输入文本信息，可以为网页url，也可以为其他不需要预测的信息列。
    pcode_col=int("2")  #输入productcode列号或者sku列号。
    price_col=int("47")  #输入价格所在列号。
    brand_col=int("4")  #输入品牌所在列号
    type_col=int("38")   #输入型号所在列号
    ser_col=int("44")   #输入系列所在列号
+    '''
-    a = Data_add(table_path,out_path,pcode_col,brand_col,price_col,type_col,ser_col,data_col)
+    a = Data_add(table_path,out_path)
    a.data_add_main()
 if __name__ == "__main__":