Commit 9e886ce1 authored by Zhouxingyu's avatar Zhouxingyu

修复了一些特殊情况下的报错,以及去除了列号输入,直接从表中读取列名

parent 5d87ab32
...@@ -5,22 +5,34 @@ from functions import * ...@@ -5,22 +5,34 @@ from functions import *
class Data_add(): class Data_add():
def __init__(self,table_path,out_path,pcode_col,brand_col,price_col,type_col,ser_col,data_col): def __init__(self,table_path,out_path):
''' '''
依次输入表格路径,输出路径,productcode,品牌,价格,型号,系列对应的列号,以及一个原始数据或者不需要预测数据的列号。 依次输入表格路径,输出路径,productcode,品牌,价格,型号,系列对应的列号,以及一个原始数据或者不需要预测数据的列号。
''' '''
self._table = data_load(table_path).reset_index(drop = True) self._table = data_load(table_path).reset_index(drop = True)
self._path_out = out_path self._path_out = out_path
self._data_col = data_col lyst = list(self._table.columns)
self._pcode_col = pcode_col for i in range(len(lyst)):
self._brand_col = brand_col if lyst[i] == 'productname':
self._price_col = price_col self._data_col = i
self._type_col = type_col if lyst[i] == 'productcode':
self._ser_col = ser_col self._pcode_col = i
if lyst[i] == 'brandname':
self._brand_col = i
if lyst[i] == 'price':
self._price_col = i
if lyst[i] == '产品型号':
self._type_col = i
if lyst[i] == '产品系列':
self._ser_col = i
try:
lyst_test = [self._data_col,self._pcode_col,self._brand_col,self._price_col,self._type_col,self._ser_col]
except AttributeError:
print('没有找到指定的列,请修改data_add_class中的代码或者检查excel表格。')
#print(self._data_col,self._pcode_col,self._brand_col,self._price_col,self._type_col,self._ser_col)
def data_add_main(self): def data_add_main(self):
print('数据装填完毕!')
some_lyst=[self._data_col,self._pcode_col,self._price_col,self._brand_col,self._type_col] some_lyst=[self._data_col,self._pcode_col,self._price_col,self._brand_col,self._type_col]
n = self._table.shape[1] #列 n = self._table.shape[1] #列
lyst=[] lyst=[]
...@@ -31,6 +43,11 @@ class Data_add(): ...@@ -31,6 +43,11 @@ class Data_add():
else: else:
lyst_null=moudle11(self._table,i,self._pcode_col) lyst_null=moudle11(self._table,i,self._pcode_col)
for j in range(len(lyst_null)): for j in range(len(lyst_null)):
table=data_add(self._table,lyst_null[j],i,self._pcode_col,self._brand_col,self._price_col,self._type_col,self._ser_col) try:
table=data_add(self._table,lyst_null[j],i,self._pcode_col,self._brand_col,self._price_col,self._type_col,self._ser_col)
except AttributeError:
return 0
print('数据装填完毕!')
loading()
print("填写空缺值完毕!") print("填写空缺值完毕!")
pre(table,self._path_out) pre(table,self._path_out)
\ No newline at end of file
...@@ -2,6 +2,7 @@ import difflib ...@@ -2,6 +2,7 @@ import difflib
import re import re
import pandas as pd import pandas as pd
import xlrd import xlrd
import time
def str_split(string): def str_split(string):
''' '''
...@@ -28,10 +29,14 @@ def string_similar(s1, s2): ...@@ -28,10 +29,14 @@ def string_similar(s1, s2):
return difflib.SequenceMatcher(None, s1, s2).quick_ratio() return difflib.SequenceMatcher(None, s1, s2).quick_ratio()
def pcode2Line_num(table,pcode,col=2): def pcode2Line_num(table,pcode):
''' '''
productcode转行号,请根据pcode的实际列数设置col的数值。 productcode转行号,请根据pcode的实际列数设置col的数值。
''' '''
lyst = list(table.columns)
for i in range(len(lyst)):
if lyst[i] == 'productcode':
col = i
data_col = list(table.iloc[:, col]) data_col = list(table.iloc[:, col])
n= data_col.index(pcode) n= data_col.index(pcode)
return n return n
...@@ -55,10 +60,19 @@ def moudle9(table,mat_pcode,pre_pcode,col): ...@@ -55,10 +60,19 @@ def moudle9(table,mat_pcode,pre_pcode,col):
return table return table
def moudle10(table,pre_pcode,ser_col=15,type_col=9): def moudle10(table,pre_pcode,brand_col,pcode_col,ser_col=15,type_col=9):
''' '''
ser_col为系列号,type_col为型号。函数将型号按照规律赋给系列号。 ser_col为系列号,type_col为型号。函数将型号按照规律赋给系列号。
''' '''
brand = str(table.iloc[pcode2Line_num(table,pre_pcode),brand_col])
brand_lyst = list(table.iloc[:,brand_col])
same_brand_pcode_lyst = []
lyst = []
for i in range(len(brand_lyst)):
if brand_lyst[i] == brand:
same_brand_pcode_lyst.append(table.iloc[i,pcode_col])
lyst.append(i)
str2pre=str_split(str(table.iloc[pcode2Line_num(table,pre_pcode),type_col])) str2pre=str_split(str(table.iloc[pcode2Line_num(table,pre_pcode),type_col]))
str2out=[] str2out=[]
if is_alphabet(str2pre[0])==False: if is_alphabet(str2pre[0])==False:
...@@ -74,7 +88,8 @@ def moudle10(table,pre_pcode,ser_col=15,type_col=9): ...@@ -74,7 +88,8 @@ def moudle10(table,pre_pcode,ser_col=15,type_col=9):
str2out.append(str2pre[i]) str2out.append(str2pre[i])
else: break else: break
str2out=''.join(str2out) str2out=''.join(str2out)
table.iloc[pcode2Line_num(table,pre_pcode),ser_col]=str2out for i in range(len(same_brand_pcode_lyst)):
table.iloc[pcode2Line_num(table,same_brand_pcode_lyst[i]),ser_col]=str2out
return table return table
...@@ -222,6 +237,9 @@ def moudle11(table,col,pcode_col): ...@@ -222,6 +237,9 @@ def moudle11(table,col,pcode_col):
lyst.append(pcode_lyst[i]) lyst.append(pcode_lyst[i])
return lyst return lyst
def loading():
print('正在填写预测数据。。。。')
time.sleep(15)
def data_load(path): def data_load(path):
data = xlrd.open_workbook(path) data = xlrd.open_workbook(path)
...@@ -255,12 +273,13 @@ def xlsx_to_csv_pd(csv_path,xlsx_path): ...@@ -255,12 +273,13 @@ def xlsx_to_csv_pd(csv_path,xlsx_path):
data_xls.to_csv(xlsx_path, encoding='utf-8') data_xls.to_csv(xlsx_path, encoding='utf-8')
def moudle12(old_table,pre_pcode,price_col,pcode_col): def moudle12(old_table,pre_pcode,price_col,pcode_col,brand_col):
''' '''
在整个品牌都缺失某项参数,将价格最相近的产品的参数赋予这个空值。 在整个品牌都缺失某项参数,将价格最相近的产品的参数赋予这个空值。
''' '''
price_col_data = list(old_table.iloc[:, price_col]) price_col_data = list(old_table.iloc[:, price_col])
price_col_data = list(map(lambda x:float(x), price_col_data)) #字符串转化为浮点型 price_col_data = list(map(lambda x:float(x), price_col_data)) #字符串转化为浮点型
brand_col_data = list(old_table.iloc[:, brand_col])
pcode_col_data = list(old_table.iloc[:, pcode_col]) pcode_col_data = list(old_table.iloc[:, pcode_col])
...@@ -269,10 +288,11 @@ def moudle12(old_table,pre_pcode,price_col,pcode_col): ...@@ -269,10 +288,11 @@ def moudle12(old_table,pre_pcode,price_col,pcode_col):
min_sub=abs(price_col_data[max_row]-price_line) min_sub=abs(price_col_data[max_row]-price_line)
for i in range(1,len(price_col_data)): for i in range(1,len(price_col_data)):
sub = abs(price_col_data[i]-price_line) if brand_col_data[i] != old_table.iloc[pcode2Line_num(old_table,pre_pcode),brand_col]:
if sub < min_sub: sub = abs(price_col_data[i]-price_line)
min_sub = sub if sub < min_sub:
max_row = i min_sub = sub
max_row = i
fit_pcode = pcode_col_data[max_row] fit_pcode = pcode_col_data[max_row]
return fit_pcode return fit_pcode
...@@ -307,9 +327,9 @@ def data_add(table,pcode,col,pcode_col,brand_col,price_col,type_col,ser_col): ...@@ -307,9 +327,9 @@ def data_add(table,pcode,col,pcode_col,brand_col,price_col,type_col,ser_col):
此处添加价格判断函数,寻找所有品牌价格最相近行。 此处添加价格判断函数,寻找所有品牌价格最相近行。
''' '''
if col == ser_col: if col == ser_col:
table=moudle10(table,pcode,ser_col,type_col) table=moudle10(table,pcode,brand_col,pcode_col,ser_col,type_col)
else: else:
match_max_pcode = moudle12(table,pcode,price_col,pcode_col) match_max_pcode = moudle12(table,pcode,price_col,pcode_col,brand_col)
table=moudle9(table,match_max_pcode,pcode,col) table=moudle9(table,match_max_pcode,pcode,col)
return table return table
table_new = table_new.reset_index(drop=True) #reset table_new = table_new.reset_index(drop=True) #reset
......
This source diff could not be displayed because it is too large. You can view the blob instead.
...@@ -15,16 +15,16 @@ def main(): ...@@ -15,16 +15,16 @@ def main():
table_path = ("lib_nonstand-stand_price.xlsx") table_path = ("lib_nonstand-stand_price.xlsx")
out_path = ("lib_nonstand-stand_new.csv") out_path = ("lib_nonstand-stand_new.csv")
'''
data_col=int('3') #输入文本信息,可以为网页url,也可以为其他不需要预测的信息列。 data_col=int('3') #输入文本信息,可以为网页url,也可以为其他不需要预测的信息列。
pcode_col=int("2") #输入productcode列号或者sku列号。 pcode_col=int("2") #输入productcode列号或者sku列号。
price_col=int("47") #输入价格所在列号。 price_col=int("47") #输入价格所在列号。
brand_col=int("4") #输入品牌所在列号 brand_col=int("4") #输入品牌所在列号
type_col=int("38") #输入型号所在列号 type_col=int("38") #输入型号所在列号
ser_col=int("44") #输入系列所在列号 ser_col=int("44") #输入系列所在列号
'''
a = Data_add(table_path,out_path,pcode_col,brand_col,price_col,type_col,ser_col,data_col) a = Data_add(table_path,out_path)
a.data_add_main() a.data_add_main()
if __name__ == "__main__": if __name__ == "__main__":
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment