Commit 9e886ce1 authored by Zhouxingyu's avatar Zhouxingyu

修复了一些特殊情况下的报错,以及去除了列号输入,直接从表中读取列名

parent 5d87ab32
......@@ -5,22 +5,34 @@ from functions import *
class Data_add():
def __init__(self,table_path,out_path,pcode_col,brand_col,price_col,type_col,ser_col,data_col):
def __init__(self,table_path,out_path):
'''
依次输入表格路径,输出路径,productcode,品牌,价格,型号,系列对应的列号,以及一个原始数据或者不需要预测数据的列号。
'''
self._table = data_load(table_path).reset_index(drop = True)
self._path_out = out_path
self._data_col = data_col
self._pcode_col = pcode_col
self._brand_col = brand_col
self._price_col = price_col
self._type_col = type_col
self._ser_col = ser_col
lyst = list(self._table.columns)
for i in range(len(lyst)):
if lyst[i] == 'productname':
self._data_col = i
if lyst[i] == 'productcode':
self._pcode_col = i
if lyst[i] == 'brandname':
self._brand_col = i
if lyst[i] == 'price':
self._price_col = i
if lyst[i] == '产品型号':
self._type_col = i
if lyst[i] == '产品系列':
self._ser_col = i
try:
lyst_test = [self._data_col,self._pcode_col,self._brand_col,self._price_col,self._type_col,self._ser_col]
except AttributeError:
print('没有找到指定的列,请修改data_add_class中的代码或者检查excel表格。')
#print(self._data_col,self._pcode_col,self._brand_col,self._price_col,self._type_col,self._ser_col)
def data_add_main(self):
print('数据装填完毕!')
some_lyst=[self._data_col,self._pcode_col,self._price_col,self._brand_col,self._type_col]
n = self._table.shape[1] #列
lyst=[]
......@@ -31,6 +43,11 @@ class Data_add():
else:
lyst_null=moudle11(self._table,i,self._pcode_col)
for j in range(len(lyst_null)):
table=data_add(self._table,lyst_null[j],i,self._pcode_col,self._brand_col,self._price_col,self._type_col,self._ser_col)
try:
table=data_add(self._table,lyst_null[j],i,self._pcode_col,self._brand_col,self._price_col,self._type_col,self._ser_col)
except AttributeError:
return 0
print('数据装填完毕!')
loading()
print("填写空缺值完毕!")
pre(table,self._path_out)
\ No newline at end of file
......@@ -2,6 +2,7 @@ import difflib
import re
import pandas as pd
import xlrd
import time
def str_split(string):
'''
......@@ -28,10 +29,14 @@ def string_similar(s1, s2):
return difflib.SequenceMatcher(None, s1, s2).quick_ratio()
def pcode2Line_num(table,pcode,col=2):
def pcode2Line_num(table,pcode):
'''
productcode转行号,请根据pcode的实际列数设置col的数值。
'''
lyst = list(table.columns)
for i in range(len(lyst)):
if lyst[i] == 'productcode':
col = i
data_col = list(table.iloc[:, col])
n= data_col.index(pcode)
return n
......@@ -55,10 +60,19 @@ def moudle9(table,mat_pcode,pre_pcode,col):
return table
def moudle10(table,pre_pcode,ser_col=15,type_col=9):
def moudle10(table,pre_pcode,brand_col,pcode_col,ser_col=15,type_col=9):
'''
ser_col为系列号,type_col为型号。函数将型号按照规律赋给系列号。
'''
brand = str(table.iloc[pcode2Line_num(table,pre_pcode),brand_col])
brand_lyst = list(table.iloc[:,brand_col])
same_brand_pcode_lyst = []
lyst = []
for i in range(len(brand_lyst)):
if brand_lyst[i] == brand:
same_brand_pcode_lyst.append(table.iloc[i,pcode_col])
lyst.append(i)
str2pre=str_split(str(table.iloc[pcode2Line_num(table,pre_pcode),type_col]))
str2out=[]
if is_alphabet(str2pre[0])==False:
......@@ -74,7 +88,8 @@ def moudle10(table,pre_pcode,ser_col=15,type_col=9):
str2out.append(str2pre[i])
else: break
str2out=''.join(str2out)
table.iloc[pcode2Line_num(table,pre_pcode),ser_col]=str2out
for i in range(len(same_brand_pcode_lyst)):
table.iloc[pcode2Line_num(table,same_brand_pcode_lyst[i]),ser_col]=str2out
return table
......@@ -222,6 +237,9 @@ def moudle11(table,col,pcode_col):
lyst.append(pcode_lyst[i])
return lyst
def loading():
print('正在填写预测数据。。。。')
time.sleep(15)
def data_load(path):
data = xlrd.open_workbook(path)
......@@ -255,12 +273,13 @@ def xlsx_to_csv_pd(csv_path,xlsx_path):
data_xls.to_csv(xlsx_path, encoding='utf-8')
def moudle12(old_table,pre_pcode,price_col,pcode_col):
def moudle12(old_table,pre_pcode,price_col,pcode_col,brand_col):
'''
在整个品牌都缺失某项参数,将价格最相近的产品的参数赋予这个空值。
'''
price_col_data = list(old_table.iloc[:, price_col])
price_col_data = list(map(lambda x:float(x), price_col_data)) #字符串转化为浮点型
brand_col_data = list(old_table.iloc[:, brand_col])
pcode_col_data = list(old_table.iloc[:, pcode_col])
......@@ -269,10 +288,11 @@ def moudle12(old_table,pre_pcode,price_col,pcode_col):
min_sub=abs(price_col_data[max_row]-price_line)
for i in range(1,len(price_col_data)):
sub = abs(price_col_data[i]-price_line)
if sub < min_sub:
min_sub = sub
max_row = i
if brand_col_data[i] != old_table.iloc[pcode2Line_num(old_table,pre_pcode),brand_col]:
sub = abs(price_col_data[i]-price_line)
if sub < min_sub:
min_sub = sub
max_row = i
fit_pcode = pcode_col_data[max_row]
return fit_pcode
......@@ -307,9 +327,9 @@ def data_add(table,pcode,col,pcode_col,brand_col,price_col,type_col,ser_col):
此处添加价格判断函数,寻找所有品牌价格最相近行。
'''
if col == ser_col:
table=moudle10(table,pcode,ser_col,type_col)
table=moudle10(table,pcode,brand_col,pcode_col,ser_col,type_col)
else:
match_max_pcode = moudle12(table,pcode,price_col,pcode_col)
match_max_pcode = moudle12(table,pcode,price_col,pcode_col,brand_col)
table=moudle9(table,match_max_pcode,pcode,col)
return table
table_new = table_new.reset_index(drop=True) #reset
......
This diff is collapsed.
......@@ -15,16 +15,16 @@ def main():
table_path = ("lib_nonstand-stand_price.xlsx")
out_path = ("lib_nonstand-stand_new.csv")
'''
data_col=int('3') #输入文本信息,可以为网页url,也可以为其他不需要预测的信息列。
pcode_col=int("2") #输入productcode列号或者sku列号。
price_col=int("47") #输入价格所在列号。
brand_col=int("4") #输入品牌所在列号
type_col=int("38") #输入型号所在列号
ser_col=int("44") #输入系列所在列号
'''
a = Data_add(table_path,out_path,pcode_col,brand_col,price_col,type_col,ser_col,data_col)
a = Data_add(table_path,out_path)
a.data_add_main()
if __name__ == "__main__":
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment