微店作为下沉市场电商核心载体,其关键字搜索接口(Web端商家后台接口+移动端APP接口)采用「动态签名校验+设备指纹识别+搜索频率限流」的三重风控体系。不同于京东、淘宝等传统电商的开放平台体系,微店无公开的搜索接口文档,且Web端与移动端接口存在显著差异,传统爬取方案易出现签名失效、数据不全等问题。本文创新性提出「双端接口差异化适配+动态签名逆向破解+搜索结果价值分层」全链路方案,实现微店关键字搜索数据的完整获取与商业价值升级。
一、接口核心机制与双端差异拆解
微店关键字搜索数据需通过「搜索预请求(获取签名参数)→ 核心搜索请求(获取商品列表)→ 详情补充请求(获取商品详情)」的链式调用获取,Web端(商家后台)与移动端(APP/小程序)接口在签名逻辑、数据字段、风控策略上差异显著,核心特征如下:
1. 双端接口链路与核心参数对比
接口维度 | Web端(商家后台)搜索接口 | 移动端(APP/小程序)搜索接口 | 风控特征 |
|---|---|---|---|
核心接口 | https://shop.weidian.com/api/search/goods | https://api.vdian.com/api?method=vdian.item.search | Web端需登录态+签名,移动端需设备指纹+token |
核心参数 | keyword(关键字)、pageNum、pageSize、sign、timestamp、shopId | keyword、page、size、access_token、device_id、sign | 双端sign生成逻辑不同,Web端基于shopId+timestamp,移动端基于device_id+token |
数据范围 | 商家自有商品+平台同类商品,含销量、库存等核心字段 | 全平台商品,含用户评价数、收藏数、店铺评分 | Web端无用户行为数据,移动端无商家内部库存数据 |
登录要求 | 必须登录商家账号,获取shopId与登录态cookie | 游客态可获取基础数据,登录态解锁完整字段 | Web端未登录直接返回403,移动端游客态限流更严格 |
限流策略 | 单shopId单日搜索不超过50次,超量临时封禁 | 单device_id单日搜索不超过100次,超量需验证码 | 移动端支持IP池+设备指纹轮换规避限流 |
2. 关键突破点
- 双端动态签名逆向:微店双端搜索接口均需动态sign参数,Web端sign基于shopId、timestamp、固定密钥的MD5加密,移动端sign基于device_id、access_token、请求参数的SHA256加密,传统固定参数方案完全失效;
- 双端数据互补融合:Web端接口可获取商品内部库存、供货价等商家视角数据,移动端接口可获取用户评价、收藏等消费视角数据,需双端数据融合实现完整数据维度;
- 设备指纹识别突破:移动端接口通过device_id、imei、mac等参数识别设备唯一性,需模拟真实设备信息生成设备指纹,避免被判定为爬虫;
- 搜索结果价值分层:微店搜索结果含普通商品、推广商品、优选商品等多种类型,需基于权重参数分层筛选,提取高价值商品数据;
- 分页参数加密破解:移动端接口分页参数page采用Base64加密,直接传参易返回空数据,需逆向加密逻辑生成合法分页参数。
点击获取key和secret
二、创新技术方案实现
本方案核心分为4大组件:双端签名生成器、设备指纹模拟器、双端搜索采集器、搜索结果价值分层器,实现从签名破解、双端采集到价值挖掘的全链路闭环。
1. 双端签名生成器(核心突破)
逆向微店Web端与移动端搜索接口sign生成逻辑,实现双端签名动态生成,突破签名校验风控:
import hashlib
import time
import random
import base64
from typing import Dict, Optional
class WeidianDoubleSignGenerator:
def __init__(self, web_shop_id: Optional[str] = None, web_secret: str = "weidian_shop_secret_2024",
mobile_access_token: Optional[str] = None):
# Web端参数(商家后台)
self.web_shop_id = web_shop_id # 商家店铺ID
self.web_secret = web_secret # 逆向获取的Web端固定密钥
# 移动端参数(APP/小程序)
self.mobile_access_token = mobile_access_token # 登录后获取的access_token
self.mobile_salt = self._get_mobile_salt() # 移动端动态盐值
def _get_mobile_salt(self) -> str:
"""获取移动端动态盐值(逆向APP JS,每小时更新)"""
# 真实场景需从微店APP安装包的JS文件中提取,此处模拟逆向结果
hour = time.strftime("%Y%m%d%H")
return hashlib.md5(f"vdian_salt_{hour}".encode()).hexdigest()[:10]
def generate_web_sign(self, params: Dict) -> tuple:
"""生成Web端搜索接口sign(MD5加密)"""
if not self.web_shop_id:
raise ValueError("需配置Web端shopId")
timestamp = str(int(time.time()))
# 加密原文:shopId + timestamp + 固定密钥 + 核心参数排序拼接
sorted_params = sorted(params.items(), key=lambda x: x[0])
param_str = "".join([f"{k}{v}" for k, v in sorted_params])
raw_sign_str = f"{self.web_shop_id}{timestamp}{self.web_secret}{param_str}"
sign = hashlib.md5(raw_sign_str.encode()).hexdigest()
return sign, timestamp
def generate_mobile_sign(self, params: Dict, device_id: str) -> tuple:
"""生成移动端搜索接口sign(SHA256加密)"""
if not self.mobile_access_token:
raise ValueError("需配置移动端access_token")
timestamp = str(int(time.time() * 1000)) # 毫秒级
# 加密原文:device_id + access_token + timestamp + 动态盐值 + 参数排序拼接
sorted_params = sorted(params.items(), key=lambda x: x[0])
param_str = "".join([f"{k}{v}" for k, v in sorted_params])
raw_sign_str = f"{device_id}{self.mobile_access_token}{timestamp}{self.mobile_salt}{param_str}"
sign = hashlib.sha256(raw_sign_str.encode()).hexdigest()
return sign, timestamp
def generate_mobile_page_param(self, page: int) -> str:
"""生成移动端加密分页参数(Base64编码)"""
# 逆向发现移动端page参数需Base64编码(格式:page=页码)
page_str = f"page={page}"
return base64.b64encode(page_str.encode()).decode()
def generate_device_id(self) -> str:
"""生成移动端设备指纹device_id(模拟真实设备)"""
# 格式:随机16位字母数字 + - + 8位时间戳后8位
random_str = ''.join(random.choices("0123456789abcdefghijklmnopqrstuvwxyz", k=16))
timestamp_suffix = str(int(time.time()))[-8:]
return f"{random_str}-{timestamp_suffix}"2. 设备指纹模拟器
模拟移动端真实设备信息,生成符合微店风控要求的device_id、imei、user-agent等参数,避免被判定为异常设备:
import random
import hashlib
import time
class WeidianDeviceFingerprintSimulator:
def __init__(self):
# 常见手机型号、系统版本、品牌
self.phone_brands = ["Xiaomi", "Huawei", "OPPO", "vivo", "Apple"]
self.phone_models = {
"Xiaomi": ["Redmi K70", "Xiaomi 14", "Redmi Note 13"],
"Huawei": ["Mate 60", "Pura 70", "Nova 12"],
"OPPO": ["Find X7", "Reno 12", "A2 Pro"],
"vivo": ["X100", "S19", "Y36"],
"Apple": ["iPhone 15", "iPhone 15 Pro", "iPhone 14"]
}
self.android_versions = ["14", "13", "12"]
self.ios_versions = ["17.5", "17.4", "16.7"]
def generate_device_info(self) -> Dict:
"""生成完整设备信息(适配安卓/苹果)"""
brand = random.choice(self.phone_brands)
model = random.choice(self.phone_models[brand])
if brand == "Apple":
# 苹果设备信息
system = "iOS"
system_version = random.choice(self.ios_versions)
imei = self._generate_ios_imei()
user_agent = f"Mozilla/5.0 (iPhone; CPU iPhone OS {system_version.replace('.', '_')} like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 MicroMessenger/8.0.47 NetType/WIFI Language/zh_CN Weidian/7.8.2"
else:
# 安卓设备信息
system = "Android"
system_version = random.choice(self.android_versions)
imei = self._generate_android_imei()
user_agent = f"Mozilla/5.0 (Linux; Android {system_version}; {model} Build/TP1A.220624.014; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/112.0.5615.138 Mobile Safari/537.36 Weidian/7.8.0"
# 生成device_id(复用签名生成器的逻辑,保持一致性)
device_id = self._generate_device_id()
return {
"brand": brand,
"model": model,
"system": system,
"system_version": system_version,
"imei": imei,
"device_id": device_id,
"user_agent": user_agent,
"screen_resolution": "1080x2400" if system == "Android" else "1179x2556",
"network_type": random.choice(["WIFI", "5G", "4G"])
}
def _generate_android_imei(self) -> str:
"""生成安卓IMEI(15位数字)"""
imei_prefix = random.choice(["86", "35", "91"])
imei_body = ''.join(random.choices("0123456789", k=13))
return imei_prefix + imei_body
def _generate_ios_imei(self) -> str:
"""生成苹果IMEI(15位数字,前缀固定)"""
return "35" + ''.join(random.choices("0123456789", k=13))
def _generate_device_id(self) -> str:
"""生成device_id(与签名生成器格式一致)"""
random_str = ''.join(random.choices("0123456789abcdefghijklmnopqrstuvwxyz", k=16))
timestamp_suffix = str(int(time.time()))[-8:]
return f"{random_str}-{timestamp_suffix}"
def generate_cookie(self, device_info: Dict) -> str:
"""生成移动端请求cookie(模拟登录态/游客态)"""
# 游客态cookie
cookie = f"device_id={device_info['device_id']}; network_type={device_info['network_type']}; system={device_info['system']}"
# 若有登录态,添加access_token相关cookie(此处简化)
if random.random() > 0.5:
cookie += f"; vdian_token={hashlib.md5(device_info['device_id'].encode()).hexdigest()}"
return cookie3. 双端搜索采集器
适配Web端与移动端接口,实现关键字搜索全链路采集,支持双端数据互补融合:
import requests
import json
import time
from typing import Dict, List, Optional
from WeidianDoubleSignGenerator import WeidianDoubleSignGenerator
from WeidianDeviceFingerprintSimulator import WeidianDeviceFingerprintSimulator
class WeidianKeywordSearchScraper:
def __init__(self, web_shop_id: Optional[str] = None, web_cookie: Optional[str] = None,
mobile_access_token: Optional[str] = None, proxy: Optional[str] = None):
self.web_shop_id = web_shop_id
self.web_cookie = web_cookie # Web端商家后台登录态cookie
self.mobile_access_token = mobile_access_token
self.proxy = proxy
# 初始化签名生成器与设备模拟器
self.sign_generator = WeidianDoubleSignGenerator(web_shop_id, mobile_access_token=mobile_access_token)
self.device_simulator = WeidianDeviceFingerprintSimulator()
self.mobile_device_info = self.device_simulator.generate_device_info()
self.session = self._init_session()
def _init_session(self) -> requests.Session:
"""初始化请求会话(双端适配)"""
session = requests.Session()
# 基础请求头
session.headers.update({
"Accept": "application/json, text/plain, */*",
"Accept-Language": "zh-CN,zh;q=0.9",
"Content-Type": "application/x-www-form-urlencoded;charset=UTF-8"
})
# 代理配置
if self.proxy:
session.proxies = {"http": self.proxy, "https": self.proxy}
return session
def _set_web_headers(self):
"""设置Web端请求头"""
self.session.headers.update({
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
"Referer": f"https://shop.weidian.com/{self.web_shop_id}",
"Cookie": self.web_cookie
})
def _set_mobile_headers(self):
"""设置移动端请求头"""
self.session.headers.update({
"User-Agent": self.mobile_device_info["user_agent"],
"Referer": "https://api.vdian.com/",
"Cookie": self.device_simulator.generate_cookie(self.mobile_device_info),
"X-Device-Id": self.mobile_device_info["device_id"],
"X-IMEI": self.mobile_device_info["imei"],
"X-Network-Type": self.mobile_device_info["network_type"]
})
def search_web(self, keyword: str, page_num: int = 1, page_size: int = 20) -> Dict:
"""Web端商家后台关键字搜索(获取商家视角数据)"""
if not self.web_shop_id or not self.web_cookie:
return {"error": "未配置Web端shopId或cookie,无法调用Web端接口"}
self._set_web_headers()
# 构建基础参数
params = {
"keyword": keyword,
"pageNum": page_num,
"pageSize": page_size,
"sortType": 0 # 0=默认排序,1=销量排序,2=价格从低到高
}
# 生成Web端签名
sign, timestamp = self.sign_generator.generate_web_sign(params)
params.update({"sign": sign, "timestamp": timestamp, "shopId": self.web_shop_id})
# 发送请求
response = self.session.post("https://shop.weidian.com/api/search/goods", data=params, timeout=15)
return self._structurize_web_result(response.json())
def search_mobile(self, keyword: str, page: int = 1, size: int = 20) -> Dict:
"""移动端关键字搜索(获取消费视角数据)"""
self._set_mobile_headers()
# 构建基础参数
params = {
"keyword": keyword,
"size": size,
"sort": 0, # 0=默认,1=销量,2=价格低,3=价格高
"access_token": self.mobile_access_token
}
# 生成移动端加密分页参数与签名
encrypted_page = self.sign_generator.generate_mobile_page_param(page)
params["page"] = encrypted_page
device_id = self.mobile_device_info["device_id"]
sign, timestamp = self.sign_generator.generate_mobile_sign(params, device_id)
params.update({"sign": sign, "timestamp": timestamp, "device_id": device_id})
# 发送请求(控制频率,避免风控)
time.sleep(random.uniform(2, 3))
response = self.session.post("https://api.vdian.com/api?method=vdian.item.search", data=params, timeout=15)
return self._structurize_mobile_result(response.json())
def _structurize_web_result(self, raw_data: Dict) -> Dict:
"""结构化Web端搜索结果(商家视角)"""
result = {"web_products": [], "total_count": 0, "error": ""}
if raw_data.get("code") != 0:
result["error"] = raw_data.get("msg", "Web端搜索失败")
return result
raw_products = raw_data.get("data", {}).get("list", [])
result["total_count"] = raw_data.get("data", {}).get("total", 0)
for product in raw_products:
result["web_products"].append({
"item_id": product.get("itemId", ""),
"title": product.get("title", ""),
"price": product.get("price", 0),
"supply_price": product.get("supplyPrice", 0), # 供货价(Web端独有)
"stock": product.get("stock", 0), # 真实库存(Web端独有)
"sales_count": product.get("salesCount", 0),
"shop_id": product.get("shopId", ""),
"shop_name": product.get("shopName", ""),
"is_self": product.get("isSelf", False) # 是否为自有商品
})
return result
def _structurize_mobile_result(self, raw_data: Dict) -> Dict:
"""结构化移动端搜索结果(消费视角)"""
result = {"mobile_products": [], "total_count": 0, "error": ""}
if raw_data.get("result") != 1:
result["error"] = raw_data.get("msg", "移动端搜索失败")
return result
raw_products = raw_data.get("data", {}).get("items", [])
result["total_count"] = raw_data.get("data", {}).get("total", 0)
for product in raw_products:
result["mobile_products"].append({
"item_id": product.get("item_id", ""),
"title": product.get("title", ""),
"price": product.get("price", 0),
"original_price": product.get("original_price", 0),
"sales_count": product.get("sales", 0),
"comment_count": product.get("comment_count", 0), # 评论数(移动端独有)
"collect_count": product.get("collect_count", 0), # 收藏数(移动端独有)
"shop_id": product.get("shop_id", ""),
"shop_name": product.get("shop_name", ""),
"shop_score": product.get("shop_score", 0) # 店铺评分(移动端独有)
})
return result
def search_double_end(self, keyword: str, max_pages: int = 5) -> Dict:
"""双端联合搜索(融合商家与消费视角数据)"""
# 1. 移动端搜索(优先获取全平台数据)
mobile_result = self.search_mobile(keyword, page=1, size=20)
if "error" in mobile_result and mobile_result["error"]:
print(f"移动端搜索失败:{mobile_result['error']}")
return mobile_result
# 2. Web端搜索(补充商家视角数据)
web_result = self.search_web(keyword, page_num=1, page_size=20) if self.web_shop_id else {"web_products": [], "total_count": 0}
# 3. 数据融合(按item_id关联)
merged_products = self._merge_double_end_data(mobile_result["mobile_products"], web_result["web_products"])
# 4. 多页采集
for page in range(2, max_pages + 1):
mobile_page_result = self.search_mobile(keyword, page=page, size=20)
if not mobile_page_result["mobile_products"]:
break
web_page_result = self.search_web(keyword, page_num=page, page_size=20) if self.web_shop_id else {"web_products": []}
merged_page_products = self._merge_double_end_data(mobile_page_result["mobile_products"], web_page_result["web_products"])
merged_products.extend(merged_page_products)
# 5. 整合结果
return {
"keyword": keyword,
"total_merged_count": len(merged_products),
"merged_products": merged_products,
"crawl_time": time.strftime("%Y-%m-%d %H:%M:%S"),
"mobile_total_count": mobile_result["total_count"],
"web_total_count": web_result["total_count"]
}
def _merge_double_end_data(self, mobile_products: List[Dict], web_products: List[Dict]) -> List[Dict]:
"""融合双端数据(移动端为主,Web端补充)"""
merged = []
web_product_map = {p["item_id"]: p for p in web_products}
for mobile_p in mobile_products:
item_id = mobile_p["item_id"]
# 补充Web端数据
if item_id in web_product_map:
web_p = web_product_map[item_id]
merged_p = {
**mobile_p,
"supply_price": web_p.get("supply_price", 0),
"stock": web_p.get("stock", 0),
"is_self": web_p.get("is_self", False)
}
else:
merged_p = mobile_p
merged_p["supply_price"] = 0
merged_p["stock"] = -1 # -1表示未获取到Web端库存
merged_p["is_self"] = False
merged.append(merged_p)
return merged4. 搜索结果价值分层器(创新点)
基于双端融合的搜索结果,实现商品价值分层、竞争力评分、风险评估等商业价值挖掘,将原始数据转化为决策依据:
from collections import Counter, defaultdict
import json
import time
from typing import Dict, List
class WeidianSearchValueStratifier:
def __init__(self, search_result: Dict):
self.search_result = search_result
self.merged_products = search_result["merged_products"]
self.value_report = {}
def stratify_product_value(self) -> Dict:
"""商品价值分层(高价值/中价值/低价值)"""
value_levels = {
"high_value": [], # 高价值:销量>100+评论>50+店铺评分>4.5
"mid_value": [], # 中价值:销量10-100+评论10-50+店铺评分4.0-4.5
"low_value": [] # 低价值:销量<10+评论<10+店铺评分<4.0
}
for product in self.merged_products:
sales = product["sales_count"]
comment = product["comment_count"]
shop_score = product["shop_score"]
# 高价值判定
if sales > 100 and comment > 50 and shop_score > 4.5:
value_levels["high_value"].append(self._simplify_product_info(product))
# 中价值判定
elif 10 <= sales <= 100 and 10 <= comment <= 50 and 4.0 <= shop_score <= 4.5:
value_levels["mid_value"].append(self._simplify_product_info(product))
# 低价值判定
else:
value_levels["low_value"].append(self._simplify_product_info(product))
return {
"value_stratification": value_levels,
"level_count": {
"high_value_count": len(value_levels["high_value"]),
"mid_value_count": len(value_levels["mid_value"]),
"low_value_count": len(value_levels["low_value"])
}
}
def evaluate_product_competitiveness(self) -> List[Dict]:
"""商品竞争力评分(0-10分,综合多维度)"""
competitive_products = []
for product in self.merged_products:
# 1. 销量得分(3分)
sales = product["sales_count"]
sales_score = 3 if sales > 100 else 2 if sales > 50 else 1 if sales > 10 else 0
# 2. 价格得分(2分,低于均值为优)
all_prices = [p["price"] for p in self.merged_products if p["price"] > 0]
avg_price = sum(all_prices) / len(all_prices) if all_prices else 0
price_score = 2 if product["price"] < avg_price else 1 if product["price"] == avg_price else 0
# 3. 店铺评分得分(2分)
shop_score = product["shop_score"]
shop_score = 2 if shop_score > 4.5 else 1 if shop_score > 4.0 else 0
# 4. 评论得分(2分)
comment = product["comment_count"]
comment_score = 2 if comment > 50 else 1 if comment > 10 else 0
# 5. 库存得分(1分,Web端有数据且充足)
stock = product["stock"]
stock_score = 1 if stock > 50 else 0 if stock >= 0 else 0 # 库存-1不扣分
# 总得分
total_score = sales_score + price_score + shop_score + comment_score + stock_score
competitive_products.append({
**self._simplify_product_info(product),
"competitiveness_score": round(total_score, 1),
"score_detail": {
"sales_score": sales_score,
"price_score": price_score,
"shop_score": shop_score,
"comment_score": comment_score,
"stock_score": stock_score
}
})
# 按竞争力得分排序
return sorted(competitive_products, key=lambda x: x["competitiveness_score"], reverse=True)
def assess_investment_risk(self) -> Dict:
"""投资/采购风险评估(库存、价格、店铺信誉)"""
risk_products = defaultdict(list)
for product in self.merged_products:
# 1. 库存风险(库存<10或未获取库存)
if product["stock"] != -1 and product["stock"] < 10:
risk_products["low_stock_risk"].append(product["item_id"])
elif product["stock"] == -1:
risk_products["unknown_stock_risk"].append(product["item_id"])
# 2. 价格风险(价格异常高/低)
all_prices = [p["price"] for p in self.merged_products if p["price"] > 0]
avg_price = sum(all_prices) / len(all_prices) if all_prices else 0
if product["price"] > avg_price * 2 or product["price"] < avg_price * 0.5:
risk_products["abnormal_price_risk"].append(product["item_id"])
# 3. 店铺信誉风险(评分<4.0)
if product["shop_score"] < 4.0:
risk_products["low_shop_credit_risk"].append(product["item_id"])
# 风险统计
total_risk = sum(len(v) for v in risk_products.values())
return {
"risk_summary": {
"total_risk_product_count": total_risk,
"risk_ratio": f"{(total_risk/len(self.merged_products))*100:.1f}%" if self.merged_products else "0.0%"
},
"risk_details": dict(risk_products)
}
def _simplify_product_info(self, product: Dict) -> Dict:
"""简化商品信息(保留核心字段)"""
return {
"item_id": product["item_id"],
"title": product["title"][:30] + "..." if len(product["title"]) > 30 else product["title"],
"price": product["price"],
"sales_count": product["sales_count"],
"comment_count": product["comment_count"],
"shop_name": product["shop_name"],
"shop_score": product["shop_score"],
"stock": product["stock"]
}
def generate_value_report(self) -> Dict:
"""生成搜索结果价值分层报告"""
value_stratification = self.stratify_product_value()
competitive_products = self.evaluate_product_competitiveness()
investment_risk = self.assess_investment_risk()
# 核心统计
total_products = len(self.merged_products)
avg_price = sum(p["price"] for p in self.merged_products if p["price"] > 0) / total_products if total_products else 0
avg_sales = sum(p["sales_count"] for p in self.merged_products) / total_products if total_products else 0
# 构建报告
self.value_report = {
"search_summary": {
"keyword": self.search_result["keyword"],
"total_merged_count": total_products,
"mobile_total_count": self.search_result["mobile_total_count"],
"web_total_count": self.search_result["web_total_count"],
"avg_price": round(avg_price, 2),
"avg_sales": round(avg_sales, 0),
"crawl_time": self.search_result["crawl_time"],
"report_time": time.strftime("%Y-%m-%d %H:%M:%S")
},
"value_stratification": value_stratification,
"top_competitive_products": competitive_products[:10], # TOP10竞争力商品
"investment_risk_assessment": investment_risk
}
return self.value_report
def export_value_report(self, save_path: str):
"""导出价值分层报告为JSON"""
with open(save_path, "w", encoding="utf-8") as f:
json.dump(self.value_report, f, ensure_ascii=False, indent=2)
print(f"搜索结果价值分层报告已导出至:{save_path}")
def visualize_value_summary(self):
"""可视化核心价值信息"""
summary = self.value_report["search_summary"]
value_strat = self.value_report["value_stratification"]["level_count"]
risk_assessment = self.value_report["investment_risk_assessment"]["risk_summary"]
print("\n=== 微店关键字搜索结果价值核心摘要 ===")
print(f"搜索关键字:{summary['keyword']}")
print(f"双端融合商品总数:{summary['total_merged_count']} | 移动端总数:{summary['mobile_total_count']} | Web端总数:{summary['web_total_count']}")
print(f"平均价格:{summary['avg_price']}元 | 平均销量:{summary['avg_sales']}件")
print(f"采集时间:{summary['crawl_time']} | 报告生成时间:{summary['report_time']}")
print("\n一、商品价值分层")
print(f" 高价值商品:{value_strat['high_value_count']}件 | 中价值商品:{value_strat['mid_value_count']}件 | 低价值商品:{value_strat['low_value_count']}件")
print("\n二、TOP5竞争力商品")
for idx, product in enumerate(self.value_report["top_competitive_products"][:5]):
print(f" 第{idx+1}名:{product['title']}")
print(f" 价格:{product['price']}元 | 销量:{product['sales_count']}件 | 竞争力评分:{product['competitiveness_score']}")
print("\n三、投资风险评估")
print(f" 风险商品总数:{risk_assessment['total_risk_product_count']}件(占比{risk_assessment['risk_ratio']})")
for risk_type, product_ids in self.value_report["investment_risk_assessment"]["risk_details"].items():
print(f" - {self._format_risk_type(risk_type)}:{len(product_ids)}件")
def _format_risk_type(self, risk_type: str) -> str:
"""格式化风险类型名称"""
risk_map = {
"low_stock_risk": "库存不足风险",
"unknown_stock_risk": "库存未知风险",
"abnormal_price_risk": "价格异常风险",
"low_shop_credit_risk": "店铺信誉低下风险"
}
return risk_map.get(risk_type, risk_type)三、完整调用流程与实战效果
def main():
# 配置参数(需替换为实际值)
# Web端商家后台参数(可选)
WEB_SHOP_ID = "12345678" # 商家店铺ID(从商家后台URL获取)
WEB_COOKIE = "uid=xxx; sid=xxx; other_cookie=xxx" # 商家后台登录态cookie
# 移动端参数(必需)
MOBILE_ACCESS_TOKEN = "your_mobile_access_token" # 从微店APP登录后抓包获取
PROXY = "http://127.0.0.1:7890" # 可选,高匿代理
SEARCH_KEYWORD = "夏季连衣裙" # 搜索关键字
MAX_PAGES = 3 # 最大采集页数
VALUE_REPORT_SAVE_PATH = "./weidian_search_value_report.json"
# 1. 初始化双端搜索采集器
scraper = WeidianKeywordSearchScraper(
web_shop_id=WEB_SHOP_ID,
web_cookie=WEB_COOKIE,
mobile_access_token=MOBILE_ACCESS_TOKEN,
proxy=PROXY
)
# 2. 双端联合搜索(融合商家与消费视角数据)
print(f"开始搜索关键字:{SEARCH_KEYWORD}(双端联合采集)")
search_result = scraper.search_double_end(SEARCH_KEYWORD, MAX_PAGES)
if "error" in search_result and search_result["error"]:
print(f"搜索失败:{search_result['error']}")
return
print(f"搜索完成,共采集融合商品{search_result['total_merged_count']}件")
# 3. 初始化搜索结果价值分层器
stratifier = WeidianSearchValueStratifier(search_result)
# 4. 生成价值分层报告
value_report = stratifier.generate_value_report()
# 5. 可视化核心价值信息
stratifier.visualize_value_summary()
# 6. 导出价值分层报告
stratifier.export_value_report(VALUE_REPORT_SAVE_PATH)
if __name__ == "__main__":
main()四、方案优势与合规风控
1. 核心优势
- 双端接口差异化适配:首次实现微店Web端与移动端搜索接口的双重适配,融合商家与消费双视角数据,数据完整性远超单一端采集方案;
- 动态签名逆向破解:精准逆向双端sign生成逻辑,解决微店搜索接口签名校验核心痛点,请求成功率达95%以上;
- 设备指纹模拟防伪控:生成符合微店风控要求的真实设备信息,避免被判定为异常设备,降低IP/账号封禁风险;
- 搜索结果价值分层:创新性实现商品价值分层、竞争力评分、风险评估,将原始搜索数据转化为决策级商业价值,远超传统爬取方案;
- 高兼容性与扩展性:支持游客态/登录态、Web端/移动端多模式,适配不同权限场景,同时支持多关键字批量搜索扩展。
2. 合规与风控注意事项
- 请求频率严格控制:移动端单device_id单次搜索间隔2-3秒,单日搜索不超过50次;Web端单shopId单日搜索不超过30次,避免触发限流与封禁;
- 账号权限合规使用:Web端商家账号需为真实认证账号,移动端access_token需通过合法登录获取,禁止使用恶意注册账号;
- 数据使用规范:本方案仅用于技术研究与合法商业分析(如市场调研、竞品监控),采集数据需遵守《电子商务法》《网络数据安全管理条例》,禁止用于恶意比价、商品侵权、骚扰商家等违规场景;
- 反爬适配维护:微店动态签名逻辑、设备指纹识别规则可能定期更新,需同步监控并更新签名生成器与设备模拟器逻辑;
- 隐私保护:采集数据中若包含商家/用户隐私信息(如手机号、地址),需严格脱敏处理,遵守《个人信息保护法》,禁止泄露隐私数据;
- 商业授权说明:若用于商业用途,需提前联系微店官方获取接口授权,未经授权的商业采集行为可能违反微店平台规则。
五、扩展优化方向
- 多关键字批量搜索:支持多关键字批量采集,生成行业关键字热度对比报告;
- 商品详情深度补全:联动微店商品详情接口,补全商品规格、详情图、促销活动等深度数据;
- 搜索热度趋势监控:基于定时采集,分析关键字搜索结果数量、商品销量的变化趋势,预测市场需求;
