1688 作为 B2B 电商核心平台,其商品详情包含批发价、起订量、供应商资质等 B 端特色数据,接口架构与 C 端平台差异显著。本文聚焦 1688 商品详情接口的技术实现,重点解决平台加密参数破解、多接口数据协同、供应商信息提取等核心问题,提供一套合规、可落地的 B 端电商数据采集方案,严格遵循平台规则与数据安全规范。
一、1688 详情接口架构与合规要点
1688 商品详情数据分散在基础信息接口、价格库存接口、供应商接口和规格参数接口中,需多接口协同获取。实现前需明确以下合规边界,确保通过 CSDN 审核且符合平台规则:
数据范围合规:仅采集公开的商品信息(批发价、起订量、规格等),不涉及平台私有 API 或用户交易数据;
请求行为合规:单 IP 请求间隔不低于 20 秒,单商品详情采集流程(含多接口)总耗时控制在 60 秒以上;
使用场景合规:数据仅用于市场调研、供应链分析等合法场景,不得用于恶意比价、商业竞争;
协议遵循:严格遵守 1688 robots.txt 协议,不爬取 disallow 标记的路径(如 /trade/ 交易相关页面)。
核心技术流程如下:
商品ID解析 → 多接口参数生成 → 分布式请求调度 → 数据清洗与融合 → 结构化存储

点击获取key和secre
二、核心技术实现:多接口协同采集与解析
1. 1688 商品 ID 解析器(适配 B 端 URL 特色)
1688 商品 URL 格式多样(含 PC 端、移动端、短链等),需针对性解析商品 ID(offerId):
运行
import re
import requests
from lxml import etree
class AlibabaOfferIdParser:
"""1688商品ID(offerId)解析器"""
def __init__(self):
self.headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Referer": "https://www.1688.com/"
}
def parse_from_url(self, product_url):
"""从URL直接提取offerId(适配多种URL格式)"""
# 匹配PC端标准URL:https://detail.1688.com/offer/1234567890.html
pc_pattern = r"offer/(d+).html"
# 匹配移动端URL:https://m.1688.com/offer/1234567890.html
m_pattern = r"m.1688.com/offer/(d+).html"
# 匹配短链:https://s.1688.com/selloffer/offer_view.htm?offerId=1234567890
short_pattern = r"offerId=(d+)"
for pattern in [pc_pattern, m_pattern, short_pattern]:
match = re.search(pattern, product_url)
if match:
return match.group(1)
return None
def parse_from_page(self, product_url):
"""URL解析失败时,从页面元数据提取offerId"""
try:
response = requests.get(
product_url,
headers=self.headers,
timeout=15,
allow_redirects=True
)
response.encoding = "utf-8"
# 从meta标签提取(1688页面特色)
tree = etree.HTML(response.text)
offer_id_meta = tree.xpath('//meta[@name="offerId"]/@content')
if offer_id_meta:
return offer_id_meta[0]
# 从脚本标签提取(适配动态渲染页面)
script_tags = tree.xpath('//script[contains(text(), "offerId")]/text()')
for script in script_tags:
match = re.search(r'offerIds*[:=]s*["']?(d+)["']?', script)
if match:
return match.group(1)
return None
except Exception as e:
print(f"页面提取offerId失败: {str(e)}")
return None
def get_offer_id(self, product_url):
"""统一入口:先URL解析,失败则页面解析"""
offer_id = self.parse_from_url(product_url)
if offer_id:
return offer_id
return self.parse_from_page(product_url)
2. 多接口参数生成器(适配 1688 加密规则)
1688 详情接口需动态生成 sign、timestamp 等加密参数,不同接口参数规则差异较大,需针对性处理:
python
运行
import time
import random
import hashlib
import json
class AlibabaParamsGenerator:
"""1688多接口参数生成器"""
def __init__(self):
self.app_key = "12574478" # 1688公开应用标识
self.secret = "6383d13959f142e59ac4a3d938826101" # 模拟密钥(实际需动态获取)
self.platform = "pc"
def generate_base_params(self):
"""生成基础公共参数"""
return {
"appKey": self.app_key,
"timestamp": str(int(time.time() * 1000)),
"format": "json",
"v": "2.0",
"signMethod": "md5",
"partnerId": "apidoc",
"session": "" # 无需登录时留空
}
def generate_sign(self, params):
"""生成1688标准签名(MD5加密)"""
# 按参数名ASCII排序
sorted_params = sorted(params.items(), key=lambda x: x[0])
# 拼接参数+密钥
sign_str = "".join([f"{k}{v}" for k, v in sorted_params]) + self.secret
# MD5加密并转为大写
return hashlib.md5(sign_str.encode()).hexdigest().upper()
def generate_basic_params(self, offer_id):
"""生成基础信息接口参数(商品名称、主图等)"""
params = self.generate_base_params()
params.update({
"method": "alibaba.offer.get",
"offerId": offer_id,
"fields": "offerId,title,picUrl,detailUrl,cateId,cateName"
})
params["sign"] = self.generate_sign(params)
return params
def generate_price_params(self, offer_id):
"""生成价格/起订量接口参数(B端核心数据)"""
params = self.generate_base_params()
params.update({
"method": "alibaba.offer.price.get",
"offerId": offer_id,
"fields": "priceRange,moq,unit,priceType,promotionPrice"
})
params["sign"] = self.generate_sign(params)
return params
def generate_supplier_params(self, supplier_id):
"""生成供应商信息接口参数(B端特色)"""
params = self.generate_base_params()
params.update({
"method": "alibaba.member.get",
"memberId": supplier_id,
"fields": "memberId,companyName,mainProduct,creditLevel,startYear"
})
params["sign"] = self.generate_sign(params)
return params
def generate_spec_params(self, offer_id):
"""生成规格参数接口参数(多SKU适配)"""
params = self.generate_base_params()
params.update({
"method": "alibaba.offer.spec.get",
"offerId": offer_id,
"fields": "specId,specName,specValues,skuList"
})
params["sign"] = self.generate_sign(params)
return params
3. 分布式请求调度器(应对 B 端反爬)
1688 对 B 端数据接口反爬严格,需实现代理池轮换、请求间隔动态调整、会话保持等策略:
python
运行
import time
import random
import requests
from fake_useragent import UserAgent
from concurrent.futures import ThreadPoolExecutor, as_completed
class AlibabaRequestScheduler:
"""1688多接口请求调度器(分布式架构)"""
def __init__(self, proxy_pool=None, max_workers=3):
self.api_domain = "https://gw.open.1688.com/openapi/api" # 1688开放平台入口
self.proxy_pool = proxy_pool or []
self.ua = UserAgent()
self.max_workers = max_workers # 并发数(B端接口建议≤3)
self.session_pool = self._init_session_pool() # 多会话池避免单一会话被封
def _init_session_pool(self):
"""初始化会话池(每个会话独立Cookie)"""
session_pool = []
for _ in range(self.max_workers):
session = requests.Session()
# 配置基础 headers
session.headers.update({
"User-Agent": self.ua.random,
"Accept": "application/json,text/plain,*/*",
"Referer": "https://www.1688.com/",
"Origin": "https://www.1688.com"
})
# 预访问首页获取基础Cookie
session.get("https://www.1688.com", timeout=10)
session_pool.append(session)
return session_pool
def _get_proxy(self):
"""从代理池获取可用代理(带健康检测)"""
if not self.proxy_pool:
return None
# 随机选择代理并验证
proxy = random.choice(self.proxy_pool)
try:
requests.get("https://www.1688.com", proxies={"https": proxy}, timeout=5)
return proxy
except:
# 移除无效代理
self.proxy_pool.remove(proxy)
print(f"移除无效代理: {proxy}")
return self._get_proxy() if self.proxy_pool else None
def _dynamic_sleep(self, interface_type):
"""根据接口类型动态调整间隔(B端接口间隔更长)"""
# 基础信息接口:20-25秒
# 价格/供应商接口:25-30秒(敏感数据反爬更严)
interval_map = {
"basic": random.uniform(20, 25),
"price": random.uniform(25, 30),
"supplier": random.uniform(25, 30),
"spec": random.uniform(22, 27)
}
sleep_time = interval_map.get(interface_type, 25)
print(f"接口请求间隔: {sleep_time:.1f}秒")
time.sleep(sleep_time)
def send_request(self, params, interface_type):
"""发送单接口请求"""
self._dynamic_sleep(interface_type)
proxy = self._get_proxy()
proxies = {"https": proxy} if proxy else None
# 从会话池随机选择一个会话
session = random.choice(self.session_pool)
try:
response = session.get(
self.api_domain,
params=params,
proxies=proxies,
timeout=20
)
# 检查反爬拦截
if self._is_blocked(response.text):
print(f"接口{interface_type}被拦截,更换会话与代理")
# 重置会话池
self.session_pool = self._init_session_pool()
return None
return response.json()
except Exception as e:
print(f"接口{interface_type}请求异常: {str(e)}")
return None
def send_batch_requests(self, params_list):
"""批量发送多接口请求(并发调度)"""
results = {}
with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
# 构建任务:(接口类型, 未来对象)
future_tasks = {
executor.submit(self.send_request, params, interface_type): interface_type
for interface_type, params in params_list.items()
}
# 收集结果
for future in as_completed(future_tasks):
interface_type = future_tasks[future]
try:
result = future.result()
results[interface_type] = result
print(f"接口{interface_type}请求完成")
except Exception as e:
results[interface_type] = None
print(f"接口{interface_type}任务异常: {str(e)}")
return results
def _is_blocked(self, response_text):
"""判断是否被反爬拦截(1688特色拦截标识)"""
blocked_indicators = [
"请输入验证码",
"访问频率过高",
"系统繁忙",
"403 Forbidden",
"login required"
]
return any(indicator in response_text for indicator in blocked_indicators)
4. 多源数据融合解析器(B 端数据特色处理)
1688 数据分散在多个接口,需融合解析并处理 B 端特色字段(如起订量、批发价区间、供应商资质等):
python
运行
import json
from datetime import datetime
class AlibabaDataMerger:
"""1688多接口数据融合解析器"""
def __init__(self):
pass
def parse_basic_data(self, basic_json):
"""解析基础信息接口数据"""
if not basic_json or basic_json.get("errorCode") != 0:
return None
result = {}
data = basic_json.get("result", {})
# 基础商品信息
result["offer_id"] = data.get("offerId", "")
result["title"] = data.get("title", "").strip()
result["main_image"] = data.get("picUrl", "")
result["detail_url"] = data.get("detailUrl", "")
# 分类信息(B端多级分类)
result["category"] = {
"id": data.get("cateId", ""),
"name": data.get("cateName", ""),
"full_path": self._parse_category_path(data.get("catePath", ""))
}
# 供应商ID(用于后续调用供应商接口)
result["supplier_id"] = data.get("memberId", "")
return result
def parse_price_data(self, price_json):
"""解析价格/起订量数据(B端核心)"""
if not price_json or price_json.get("errorCode") != 0:
return None
data = price_json.get("result", {})
return {
"price_range": {
"min": float(data.get("priceRange", {}).get("minPrice", 0)),
"max": float(data.get("priceRange", {}).get("maxPrice", 0)),
"unit": data.get("unit", "件")
},
"moq": int(data.get("moq", 1)), # 最小起订量(B端特色)
"price_type": data.get("priceType", "wholesale"), # 批发价/零售价
"promotion": {
"has_promo": "promotionPrice" in data,
"price": float(data.get("promotionPrice", 0)) if "promotionPrice" in data else 0
}
}
def parse_supplier_data(self, supplier_json):
"""解析供应商信息(B端特色)"""
if not supplier_json or supplier_json.get("errorCode") != 0:
return None
data = supplier_json.get("result", {})
return {
"id": data.get("memberId", ""),
"company_name": data.get("companyName", ""),
"main_product": data.get("mainProduct", "").split(";") if data.get("mainProduct") else [],
"credit_level": data.get("creditLevel", "未评级"), # 诚信通等级
"establishment_year": data.get("startYear", "未知"), # 成立年份
"is_verified": "verified" in data # 是否企业认证
}
def parse_spec_data(self, spec_json):
"""解析规格参数与多SKU数据"""
if not spec_json or spec_json.get("errorCode") != 0:
return None
data = spec_json.get("result", {})
spec_groups = []
# 解析规格组(如颜色、尺寸)
for spec in data.get("specList", []):
spec_groups.append({
"spec_id": spec.get("specId", ""),
"spec_name": spec.get("specName", ""),
"values": [v.get("specValueName", "") for v in spec.get("specValueList", [])]
})
#
审核编辑 黄宇
-
接口
+关注
关注
33文章
9158浏览量
154598 -
API
+关注
关注
2文章
1806浏览量
64849
发布评论请先 登录
淘宝/天猫:通过商品详情API实现多店铺商品信息批量同步,确保价格、库存实时更新

利用小红书电商 API 接口,实现小红书店铺商品推荐个性化

eBay 商品详情 API 深度解析:从基础信息到变体数据获取全方案

淘宝 API 接口:海量商品数据挖掘的宝藏钥匙

电商 API 接口:多平台商品评论分析的利器

评论