Commit 4a77589c authored by huangiyu's avatar huangiyu

1

parent ca02d78d
import requests
import json
import pymysql
import re
mysql_zdzs_proxy_host = '59.110.219.171'
mysql_zdzs_proxy_user = 'zgcindex'
mysql_zdzs_proxy_password = 'zgcprice2019'
mysql_zdzs_proxy_database = 'zdzs_proxy'
conn = pymysql.connect(
host=mysql_zdzs_proxy_host,
user=mysql_zdzs_proxy_user,
password=mysql_zdzs_proxy_password,
database=mysql_zdzs_proxy_database,
charset="utf8mb4",
cursorclass=pymysql.cursors.DictCursor,
autocommit=True
)
cur = conn.cursor()
def mysql_zdzs_proxy(sql=None):
print(sql)
try:
cur.execute(sql)
result = cur.fetchall()
return result
except pymysql.err.IntegrityError as e:
# cur.close()
return []
except Exception as e:
return []
from urllib.parse import quote
def remove_html_tags(text):
# 使用正则表达式删除 HTML 标签
clean_text = re.sub(r'<[^>]+>', '', text)
return clean_text
url_data = mysql_zdzs_proxy(
"SELECT id,url,mallId FROM DMP_SEARCH_SPIDER WHERE ID NOT IN (SELECT ID FROM DMP_SEARCH_DATA) and mallId = 'DS-JD' and project_id = '110' and (id like '%0' or id like '%1' or id like '%2' or id like '%3' ) limit 10000"))
for row in url_data:
url = row['url']
keyword = url.replace('https://search.jd.com/search?keyword=', "").replace('&enc=utf-8', "")
print(keyword)
keyword = quote(keyword)
url = f"https://api.m.jd.com/api?functionId=pc_search_adv_Search&appid=search-pc-java&client=pc&clientVersion=1.0.0&uuid=143920055.1664177649338438720918.1664177649.1710402971.1710474746.407&loginType=3&t=1710474777240&body=%7B%22area%22:%221%22,%22enc%22:%22utf-8%22,%22keyword%22:%22{keyword}%22,%22adType%22:7,%22page%22:%221%22,%22ad_ids%22:%22291:19%22,%22xtest%22:%22new_search%22%7D&x-api-eid-token=jdd036KK7GVN2A5VXFOZVHAPY25WX7HXJAN62PDVL6JU6EOWVUQJMGEAWKRGEDHIE367RY5TVUPVMRDVFP3CN276H7UOWKYAAAAMOIA6VIPAAAAAACT7MJYE5DSZYVQX"
payload = {}
headers = {
'origin': 'https://search.jd.com',
'Cookie': 'shshshfpa=302d4458-a337-1470-3d87-19967a3d383c-1664177595; __jdu=1664177649338438720918; pinId=m_q_aGs3ba1NmxbgWiZkb7V9-x-f3wj7; jcap_dvzw_fp=6hXnN9RzY6q93f9CIqpUUYVuWPqw8tbJ848WPVvfBOjJmE9TIGJb1Yf5gt4E-YG79w4dtC-1GyVyjEGpw5-w2w; shshshfpx=302d4458-a337-1470-3d87-19967a3d383c-1664177595; shshshfp=1f6ff101d0ef7e3e106d3b6137763832; pin=jd_54fd27b0d2b22; unick=huangziyu0726; _tp=j3ClGrQ8eyicnottNC9z45B9wgXg9sbTugOWYIqmeB0%3D; _pst=jd_54fd27b0d2b22; autoOpenApp_downCloseDate_auto=1705733792609_1800000; autoOpenApp_downCloseDate_autoOpenApp_autoPromptly=1706681658897_1; _gcl_au=1.1.1699672507.1709478014; mba_muid=1664177649338438720918; user-key=88908ac9-9cfd-4652-9b2f-f1b9ecad98ea; unpl=JF8EALJnNSttDBhXBB0KSBRDHFsDWw8MQx8Dbm8EA15dHlYDHlAdFRJ7XlVdWBRKFR9uYxRXX1NOVw4aASsSEXteU11bD00VB2xXVgQFDQ8WUUtBSUt-S1tUWFkNTRYAbGUEZG1bS2QFGjIbFRdOXVJYWg1DFQFmYgddW1tKXAUSMhoiF3ttZFpVAU0QAl9mNVVtGh8IABgEHxMXBl1TWVgITREEam8HVlRdSV0DGAMTEhl7XGRd; __jdv=229668127|baidu-search|t_262767352_baidusearch|cpc|171754702230_0_eb2079b7bf7f4c5991090f24d374c763|1709653324334; mt_xid=V2_52007VwMVVVhYVFwYTBFeB24GEFtbW1NSH0ApWFA1VBZSCV1OCUtBHUAAMAFCTlQLU1wDThBcVWYGQQBbCwVbL0oYXwB7AhBOXF9DWx5CGlkOZwMiUG1bYlkeTxFZAFcAFVJb; areaId=1; ipLoc-djd=1-2800-55811-0; RT="z; token=0331448fb40d80da734aceff7d48bafa,3,950263; __tk=uzXDYz2FXUJ0uzu0XsqFZsvTvsk0XpbTvSnoXzJ5uzX,3,950263; 3AB9D23F7A4B3CSS=jdd036KK7GVN2A5VXFOZVHAPY25WX7HXJAN62PDVL6JU6EOWVUQJMGEAWKRGEDHIE367RY5TVUPVMRDVFP3CN276H7UOWKYAAAAMOIA6VIPAAAAAACT7MJYE5DSZYVQX; _gia_d=1; jsavif=1; wlfstk_smdl=rtj4txsjw59ztpv9fsn6yxbvt6ty3wiu; TrackID=1C2yN2jHh6Wg-NhNJLzHHqcnZDWxML5PG3tsIgqmxor1OVm1c_lEWSZjqo9H4QEOUaP-l0Y7EKLOnfjHC2RU7Zdj3DG6mRzJkXqQdNCcnpYM; thor=237C3D3A4A70C62901A697588128F575FE0CEF91A4213E671F253B27AF7FB601536685D63E703D077E709159409EBEC0A3B565591643E47E6D1B0A57C5DFFA4F2A9ECC9ADFD979740CDD4D1842A5CCF28165BD1E1640E4BEA87287FF3023299B2A43DCC6FB3EAA976E79787E1A14A8622CA12CA512495BF85C7F69E30C2E85066B6369585D02B7A58F37806B5C4A07D5DD8E07D1536CF0A7BC676D403012149B; flash=2_qECj__VcDB1S4gEaqS6lrKKvZR4TCQeCSlLdk3QgDH49Q00GmtDvwTO4fhRVPGj_EQMRN_rL8wOk4cX0er-Ohvzpua_6-Cmq4DDcx0Kw5ek*; ceshi3.com=000; mba_sid=17104747515401167401247798463.1; __jd_ref_cls=LoginDisposition_Go; x-rp-evtoken=N-nAb5Oj6OS1u8hkvixIgNLm8epG1xAIhLtiT01VwscTFkwuzJmUObwZwoww3HGIwRv7KyWgxiTC2ghErjsmtxK1DyAkyLbcd5ne9cxLAiPxp1q6nvjZScJDfpNzXE1k5ihL2oqXaUM_PwNW2jZaINB6AUpZgSu1KOrAs0nlGROPmgZMyeVGQG3w1hLHt5rvi3Ylmy-qdixDl4Ds4t92oegs-EbHvqtsTNHNnJcMjG0%3D; __jda=143920055.1664177649338438720918.1664177649.1710402971.1710474746.407; __jdc=143920055; shshshfpb=BApXeUi81Q-tA2-pIUXD6xI9OcOHB-4nmByZWcrpX9xJ1MsQPq4O2; 3AB9D23F7A4B3C9B=6KK7GVN2A5VXFOZVHAPY25WX7HXJAN62PDVL6JU6EOWVUQJMGEAWKRGEDHIE367RY5TVUPVMRDVFP3CN276H7UOWKY; __jdb=143920055.6.1664177649338438720918|407.1710474746',
'User-Agent': 'Apifox/1.0.0 (https://apifox.com)',
'Accept': '*/*',
'Host': 'api.m.jd.com',
'Connection': 'keep-alive'
}
response = requests.request("GET", url, headers=headers, data=payload)
response_json = json.loads(response.text)
print(response_json)
dataList = []
if response_json != {}:
for sku_info in response_json['291']:
dataList.append(
{
'is_self_operate_div': '',
'item_url': 'https://item.jd.com/' + str(sku_info['sku_id']) + '.html',
'sku_id': str(sku_info['sku_id']),
'shop_name': sku_info['shop_link']['shop_name'],
'sku_name': remove_html_tags(sku_info['ad_title']),
'sku_price': sku_info['sku_price'],
}
)
insert_sql = f"""
INSERT INTO DMP_SEARCH_DATA(id,result,mallId)
VALUES ({row['id']},'{json.dumps(dataList, ensure_ascii=False).replace("'", "''")}','{row['mallId']}')
"""
if response_json != {}:
mysql_zdzs_proxy(insert_sql)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment