37 KiB
xz_cable_setup_check_tool.py build
pyinstaller --onefile \
--paths=/home/gyou/NexusBench-baihe-br/nexusbench \
--paths=/home/gyou/NexusBench-baihe-br/nexusbench/log \
--paths=/home/gyou/NexusBench-baihe-br/nexusbench/connection \
--paths=/home/gyou/NexusBench-baihe-br/nexusbench/gpu/biren \
--hidden-import=connection.http_helper \
--runtime-tmpdir=/home/gyou/tmp \
/home/gyou/NexusBench-baihe-br/nexusbench/biren_test.py
# 加了这一行就可以运行了
--runtime-tmpdir=/home/gyou/tmp \
--hidden-import=log.logger \
--hidden-import=connection.ssh_connection_manager \
./build/whiteriver_exp --host 10.57.216.109 --exp 2 --cmd vcmd --param rev
./vuart -i 10.57.216.109 -e 2 -c vcmd -p ver ./vuart -i 10.57.216.108 -e 4 -c fw-down -p
./build/whiteriver_exp -i 10.57.216.108 -e 4 -c fw-down -p
./build/whiteriver_exp -i 10.57.216.91 -e 4 -c vcmd -p ver
net use X: \10.57.216.173\shared /user:root ossdbg1
PicT1!2@3#4$
RCms@Zte3
./build/whiteriver_exp -i 10.57.216.94 -e 4 -c fw-down -p "./whiteriver_exp@1.0.17+2508161844.img"
./build/whiteriver_exp -i 10.57.216.95,10.57.216.96,10.57.216.97,10.57.216.98
scp /usr/bin/ocs_link_reset root@10.57.216.166
scp -o "BatchMode=no" -o "StrictHostKeyChecking=no" -i pass.txt /usr/bin/ocs_link_reset root@10.57.216.166:/usr/bin/ scp -o "BatchMode=no" -o "StrictHostKeyChecking=no" -i pass.txt /usr/bin/ocs_link_reset root@10.57.216.165:/usr/bin/ scp -o "BatchMode=no" -o "StrictHostKeyChecking=no" -i pass.txt /usr/bin/ocs_link_reset root@10.57.216.187:/usr/bin/ scp -o "BatchMode=no" -o "StrictHostKeyChecking=no" -i pass.txt /usr/bin/ocs_link_reset root@10.57.216.148:/usr/bin/
scp -o "BatchMode=no" -o "StrictHostKeyChecking=no" -i pass.txt /usr/bin/ocs_link_reset root@10.57.216.163:/usr/bin/ scp -o "BatchMode=no" -o "StrictHostKeyChecking=no" -i pass.txt /usr/bin/ocs_link_reset root@10.57.216.139:/usr/bin/ scp -o "BatchMode=no" -o "StrictHostKeyChecking=no" -i pass.txt /usr/bin/ocs_link_reset root@10.57.216.173:/usr/bin/ scp -o "BatchMode=no" -o "StrictHostKeyChecking=no" -i pass.txt /usr/bin/ocs_link_reset root@10.57.216.167:/usr/bin/
scp -o "BatchMode=no" -o "StrictHostKeyChecking=no" -i pass.txt /usr/bin/ocs_link_reset root@10.57.216.134:/usr/bin/ scp -o "BatchMode=no" -o "StrictHostKeyChecking=no" -i pass.txt /usr/bin/ocs_link_reset root@10.57.216.145:/usr/bin/ scp -o "BatchMode=no" -o "StrictHostKeyChecking=no" -i pass.txt /usr/bin/ocs_link_reset root@10.57.216.176:/usr/bin/ scp -o "BatchMode=no" -o "StrictHostKeyChecking=no" -i pass.txt /usr/bin/ocs_link_reset root@10.57.216.180:/usr/bin/
scp -o "BatchMode=no" -o "StrictHostKeyChecking=no" -i pass.txt /usr/bin/ocs_link_reset root@10.57.216.185:/usr/bin/ scp -o "BatchMode=no" -o "StrictHostKeyChecking=no" -i pass.txt /usr/bin/ocs_link_reset root@10.57.216.150:/usr/bin/ scp -o "BatchMode=no" -o "StrictHostKeyChecking=no" -i pass.txt /usr/bin/ocs_link_reset root@10.57.216.168:/usr/bin/ scp -o "BatchMode=no" -o "StrictHostKeyChecking=no" -i pass.txt /usr/bin/ocs_link_reset root@10.57.216.174:/usr/bin/
scp -o "BatchMode=no" -o "StrictHostKeyChecking=no" -i pass.txt /usr/bin/ocs_link_reset root@10.57.216.132:/usr/bin/ scp -o "BatchMode=no" -o "StrictHostKeyChecking=no" -i pass.txt /usr/bin/ocs_link_reset root@10.57.216.189:/usr/bin/ scp -o "BatchMode=no" -o "StrictHostKeyChecking=no" -i pass.txt /usr/bin/ocs_link_reset root@10.57.216.151:/usr/bin/ scp -o "BatchMode=no" -o "StrictHostKeyChecking=no" -i pass.txt /usr/bin/ocs_link_reset root@10.57.216.156:/usr/bin/
from dataclasses import dataclass import re from typing import List, Optional, Dict, DefaultDict import time from enum import Enum import logging from collections import defaultdict from parser.topology_parser import TopoMappingParser from parser.transceiver_config_parser import TransceiverConfigParser from toolbox.opt_reg_access_tool import OptRegAccessTool from gpu.biren.exp_util import DevWhiteRiverExp from parser.ibias_rssi_map_parser import IbiasRssiMapParser
logger = logging.getLogger(name)
--------------------------------------------------
Eye Issue Definition (Corrected Logic)
--------------------------------------------------
class OptReg(Enum): MGC = ''
class EyeIssue(Enum): EYE_NORMAL = 0 EYE_TOO_SMALL = 1 # 垂直开口太小: sum(|top|, |down|) < 30 EYE_TOO_LARGE = 2 EYE_TOO_DOWN = 3 # 眼图偏低: top < 15 EYE_TOO_UP = 4 # 眼图偏高: |down| < 15 (i.e., down > -15)
@dataclass class EyeData: rt_phys_index: int = -1 phys_lane_index: int = -1 rt_index: int = -1 lane_index: int = -1 down: float = -1 # mV (usually negative) top: float = -1 # mV (usually positive) left: float = -1 # UI right: float = -1 # UI issue: EyeIssue = EyeIssue.EYE_NORMAL
def __str__(self):
return (f"RTMR{self.rt_index} EYE_A{self.lane_index:02d}: "
f"({self.down:.1f}, {self.top:.1f}) mV | "
f"({self.left:.2f}, {self.right:.2f}) UI")
@property
def vertical_amplitude(self) -> float:
return abs(self.down) + abs(self.top)
@property
def horizontal_amplitude(self) -> float:
return abs(self.left) + abs(self.right)
@property
def quality_score(self) -> float:
return self.vertical_amplitude
def determine_issue(self) -> None:
if self.vertical_amplitude < 30:
self.issue = EyeIssue.EYE_TOO_SMALL
elif self.vertical_amplitude > 220:
self.issue = EyeIssue.EYE_TOO_LARGE
elif self.top < 15:
self.issue = EyeIssue.EYE_TOO_DOWN
elif self.down > -15: # 因为 down 是负值,> -15 表示离 0 太近(太高)
self.issue = EyeIssue.EYE_TOO_UP
else:
self.issue = EyeIssue.EYE_NORMAL
--------------------------------------------------
🔧 EQ Tuning Tool
--------------------------------------------------
class EqTuneTool: def init(self, local_bmc: DevWhiteRiverExp, remote_bmc: DevWhiteRiverExp, local_reg_tool: OptRegAccessTool, remote_reg_tool: OptRegAccessTool, ibias_rssi_map: IbiasRssiMapParser, route_name: str): self.local_bmc = local_bmc self.remote_bmc = remote_bmc self.local_reg_tool = local_reg_tool self.remote_reg_tool = remote_reg_tool self.ibias_rssi_map = ibias_rssi_map self.route_name = route_name self.topo_map = TopoMappingParser("./main_data/topo_mapping.yaml") self.topo_map.parse()
def eq_auto_tune(self, exp_id: int):
cmd = 'ver'
raw_output = self.local_bmc.CmdVendorCommand(exp_id, cmd)
logging.info(raw_output)
raw_tia_peak_value = self.local_reg_tool.read_tia_peak_reg(exp_id, 2, 0)
logging.info(f'Test Initial tia_peak_value: {raw_tia_peak_value}')
logger.info(f"Starting auto tune for exp id: {exp_id}")
# retimers = [1, 2, 3, 4]
retimers = [3]
for retimer_index in retimers:
bad_eyes = self.get_issue_eye_diagram(exp_id, retimer_index)
time.sleep(1)
if bad_eyes:
self.process_worst_eyes(exp_id, retimer_index, bad_eyes)
else:
logger.info(f"No eye issues found on RTMR{retimer_index}")
def process_worst_eyes(self, exp_id: int, retimer_index: int, bad_eyes: List[EyeData]) -> None:
logger.warning(f"Detected eye issues on RTMR{retimer_index}:")
for eye in bad_eyes:
logger.warning(
f"RTMR {eye.rt_index} Lane {eye.lane_index:02d}: {eye.issue.name} "
f"(Vertical: {eye.vertical_amplitude:.1f}mV, Top={eye.top:.1f}, Down={eye.down:.1f})"
)
for eye in bad_eyes:
logger.warning(
f"Starting Process RTMR {eye.rt_index} Lane {eye.lane_index:02d}: {eye.issue.name} "
f"(Vertical: {eye.vertical_amplitude:.1f}mV, Top={eye.top:.1f}, Down={eye.down:.1f})"
)
tmp_eye = eye
max_retry_cout = 2
try_count = 0
while tmp_eye.issue != EyeIssue.EYE_NORMAL and try_count < max_retry_cout:
logging.info(f'------try count: {try_count}')
if tmp_eye.issue == EyeIssue.EYE_TOO_SMALL:
self.process_small_eye(exp_id, retimer_index, eye)
if tmp_eye.issue == EyeIssue.EYE_TOO_DOWN:
self.process_down_eye(exp_id, retimer_index, eye)
elif tmp_eye.issue == EyeIssue.EYE_TOO_UP:
self.process_up_eye(exp_id, retimer_index, eye)
# if tmp_eye.issue == EyeIssue.EYE_TOO_LARGE:
# logging.info(f"Eye issue: {eye.issue.name}")
try_count +=1
# # if tmp_eye.issue == EyeIssue.EYE_TOO_LARGE:
# # 如果rssi很大,那适当减少,减少ibias
# # slot = self.topo_map.get_remote_slot_by_retimer(eye.rt_index, eye.lane_index)
# # if slot is None:
# # break
# # slot_id = slot[0]
# # lane_id = slot[1]
# # ibias_value = self.remote_reg_tool.
def process_small_eye(self, exp_id: int, retimer_index: int, eye: EyeData):
logger.info(f"Starting small eye issue processing for RTMR{retimer_index} lane {eye.lane_index}, adjusting ibias")
# 获取slot信息
remote_slot = self.topo_map.get_remote_slot_by_retimer(eye.rt_index, eye.lane_index)
local_slot = self.topo_map.get_local_slot_by_retimer(eye.rt_index, eye.lane_index)
if remote_slot is None or local_slot is None:
logger.error(f"Invalid slot for RTMR{retimer_index} lane {eye.lane_index}")
return eye
remote_slot_id = remote_slot[0]
remote_lane_id = remote_slot[1]
local_slot_id = local_slot[0]
local_lane_id = local_slot[1]
logger.info(f"Processing small eye for RTMR{eye.rt_index} lane {eye.lane_index}: "
f"remote_slot=({remote_slot_id},{remote_lane_id}), "
f"local_slot=({local_slot_id},{local_lane_id})")
# 调节ibias
logger.info(f"Adjusting ibias for RTMR{eye.rt_index} lane {eye.lane_index}")
eye = self.adjust_ibias_for_small_eye(exp_id, eye, remote_slot_id, remote_lane_id, local_slot_id, local_lane_id)
# 如果依然很小,调节opcurrent
if eye.issue == EyeIssue.EYE_TOO_SMALL:
logger.info(f"Eye still too small for RTMR{retimer_index} lane {eye.lane_index}, adjusting opcurrent")
eye = self.adjust_opcurrent_for_small_down_eye(exp_id, eye, remote_slot_id, remote_lane_id)
# eye = self.adjust_opcurrent_for_up_eye(exp_id, eye, remote_slot_id, remote_lane_id)
# 如果依然很小,调节mgc
if eye.issue == EyeIssue.EYE_TOO_SMALL:
logger.info(f"Eye still too small for RTMR{retimer_index} lane {eye.lane_index}, adjusting mgc")
eye = self.adjust_mgc_for_small_eye(exp_id, eye, local_slot_id, local_lane_id)
logger.info(f"Completed small eye processing for RTMR{retimer_index} lane {eye.lane_index}, "
f"final issue: {eye.issue.name}")
return eye
def process_down_eye(self, exp_id: int, retimer_index: int, eye: EyeData):
logging.info(f"Starting down eye issue processing for RTMR{retimer_index} lane {eye.lane_index}")
# 获取slot信息
remote_slot = self.topo_map.get_remote_slot_by_retimer(eye.rt_index, eye.lane_index)
local_slot = self.topo_map.get_local_slot_by_retimer(eye.rt_index, eye.lane_index)
if remote_slot is None or local_slot is None:
logger.error(f"Invalid slot for RTMR{retimer_index} lane {eye.lane_index}")
return eye
remote_slot_id = remote_slot[0]
remote_lane_id = remote_slot[1]
local_slot_id = local_slot[0]
local_lane_id = local_slot[1]
logger.info(f"Processing down eye for RTMR{eye.rt_index} lane {eye.lane_index}: "
f"remote_slot=({remote_slot_id},{remote_lane_id}), "
f"local_slot=({local_slot_id},{local_lane_id})")
# 先调节tia_peak(值调小)
logger.info(f"Adjusting tia_peak for RTMR{eye.rt_index} lane {eye.lane_index}")
eye = self.adjust_tia_peak_for_down_eye(exp_id, eye, local_slot_id, local_lane_id)
# 如果依然偏下,调节high_freq(值调小)
if eye.issue == EyeIssue.EYE_TOO_DOWN:
logger.info(f"Eye still too down for RTMR{retimer_index} lane {eye.lane_index}, adjusting high_freq")
eye = self.adjust_high_freq_for_down_eye(exp_id, eye, remote_slot_id, remote_lane_id)
# 如果依然偏下,调节opcurrent
if eye.issue == EyeIssue.EYE_TOO_DOWN:
logger.info(f"Eye still too down for RTMR{retimer_index} lane {eye.lane_index}, adjusting opcurrent")
eye = self.adjust_opcurrent_for_small_down_eye(exp_id, eye, remote_slot_id, remote_lane_id)
logger.info(f"Completed down eye processing for RTMR{retimer_index} lane {eye.lane_index}, "
f"final issue: {eye.issue.name}")
return eye
def process_up_eye(self, exp_id: int, retimer_index: int, eye: EyeData):
logging.info(f"Starting up eye issue processing for RTMR{retimer_index} lane {eye.lane_index}")
# 获取slot信息
remote_slot = self.topo_map.get_remote_slot_by_retimer(eye.rt_index, eye.lane_index)
local_slot = self.topo_map.get_local_slot_by_retimer(eye.rt_index, eye.lane_index)
if remote_slot is None or local_slot is None:
logger.error(f"Invalid slot for RTMR{retimer_index} lane {eye.lane_index}")
return eye
remote_slot_id = remote_slot[0]
remote_lane_id = remote_slot[1]
local_slot_id = local_slot[0]
local_lane_id = local_slot[1]
logger.info(f"Processing up eye for RTMR{eye.rt_index} lane {eye.lane_index}: "
f"remote_slot=({remote_slot_id},{remote_lane_id}), "
f"local_slot=({local_slot_id},{local_lane_id})")
# 先调节tia_peak(值调大)
logger.info(f"Adjusting tia_peak for RTMR{eye.rt_index} lane {eye.lane_index}")
eye = self.adjust_tia_peak_for_up_eye(exp_id, eye, local_slot_id, local_lane_id)
# 如果依然偏高,调节high_freq(值调大)
if eye.issue == EyeIssue.EYE_TOO_UP:
logger.info(f"Eye still too up for RTMR{retimer_index} lane {eye.lane_index}, adjusting high_freq")
eye = self.adjust_high_freq_for_up_eye(exp_id, eye, remote_slot_id, remote_lane_id)
# 如果依然偏高,调节opcurrent(值调小)
if eye.issue == EyeIssue.EYE_TOO_UP:
logger.info(f"Eye still too up for RTMR{retimer_index} lane {eye.lane_index}, adjusting opcurrent")
eye = self.adjust_opcurrent_for_up_eye(exp_id, eye, remote_slot_id, remote_lane_id)
logger.info(f"Completed up eye processing for RTMR{retimer_index} lane {eye.lane_index}, "
f"final issue: {eye.issue.name}")
return eye
def adjust_ibias_for_small_eye(self, exp_id: int, eye: EyeData, remote_slot_id: int, remote_lane_id: int,
local_slot_id: int, local_lane_id: int) -> EyeData:
ibias_range = [2000, 3200]
rssi_range = [5000, 10000]
ibias_value = self.remote_reg_tool.read_ibias_reg(exp_id, remote_slot_id, remote_lane_id)
logging.info(f'Initial ibias_value: {ibias_value}')
if ibias_value == 0:
logging.warning(f'read ibias reg again')
ibias_value = self.remote_reg_tool.read_ibias_reg(exp_id, remote_slot_id, remote_lane_id)
logging.info(f'Initial ibias_value: {ibias_value}')
while ibias_value < ibias_range[1]:
# 根据当前值确定步长
step = 300 if ibias_value < 2500 else 150
ibias_value += step
self.remote_reg_tool.write_ibias_reg(exp_id, remote_slot_id, remote_lane_id, ibias_value)
logging.info(f'Adjusted ibias to {ibias_value} with step {step}')
ibias_lane = self.remote_reg_tool.get_ibias_by_logic_lane(remote_lane_id)
rssi_lanes = self.ibias_rssi_map.get_rssi_lane(self.route_name, ibias_lane)
if rssi_lanes is None:
logging.error('RSSI lanes is none')
break
rssi1_value = self.local_reg_tool.read_rssi_reg(exp_id, local_slot_id, rssi_lanes[0])
rssi2_value = self.local_reg_tool.read_rssi_reg(exp_id, local_slot_id, rssi_lanes[1])
logging.info(f'RSSI values: rssi1={rssi1_value}, rssi2={rssi2_value}')
is_margin = False
if rssi1_value > rssi_range[1] or rssi2_value > rssi_range[1]:
# 回退一步
self.remote_reg_tool.write_ibias_reg(exp_id, remote_slot_id, remote_lane_id, ibias_value - step)
logging.info(f'RSSI margin exceeded, rollback ibias to {ibias_value - step}')
is_margin = True
eye = self.get_eye_diagram_per_lane(exp_id, eye.rt_phys_index, eye.phys_lane_index)
logging.info(f'After ibias adjustment, eye issue: {eye.issue.name}')
if eye.issue != EyeIssue.EYE_TOO_SMALL or is_margin:
break
logging.info(f'Final ibias adjustment result: ibias={ibias_value}, eye issue={eye.issue.name}')
return eye
def adjust_opcurrent_for_small_down_eye(self, exp_id: int, eye: EyeData, remote_slot_id: int, remote_lane_id: int) -> EyeData:
raw_eye_issue = eye.issue
raw_opcurrent_value = self.remote_reg_tool.read_opcurrent_reg(exp_id, remote_slot_id, remote_lane_id)
logging.info(f'Initial opcurrent_value: {raw_opcurrent_value}')
op_range = [100, 190]
opcurrent_value = raw_opcurrent_value
while opcurrent_value < op_range[1]:
step = 10
opcurrent_value += step
if opcurrent_value > op_range[1]:
opcurrent_value = op_range[1]
self.remote_reg_tool.write_opcurrent_reg(exp_id, remote_slot_id, remote_lane_id, opcurrent_value)
logging.info(f'Adjusted opcurrent to {opcurrent_value}')
eye = self.get_eye_diagram_per_lane(exp_id, eye.rt_phys_index, eye.phys_lane_index)
logging.info(f'After opcurrent adjustment, eye issue: {eye.issue.name}')
if eye.issue != raw_eye_issue or opcurrent_value == op_range[1]:
break
# 如果没有改善,恢复原始值
if eye.issue == raw_eye_issue:
self.remote_reg_tool.write_opcurrent_reg(exp_id, remote_slot_id, remote_lane_id, raw_opcurrent_value)
logging.info(f'No improvement, rollback opcurrent to {raw_opcurrent_value}')
else:
logging.info(f'opcurrent adjustment successful, final value: {opcurrent_value}')
return eye
def adjust_mgc_for_small_eye(self, exp_id: int, eye: EyeData, local_slot_id: int, local_lane_id: int) -> EyeData:
raw_mgc_value = self.local_reg_tool.read_mgc_reg(exp_id, local_slot_id, local_lane_id)
logging.info(f'Initial mgc_value: {raw_mgc_value}')
mgc_range = [raw_mgc_value, raw_mgc_value + 10]
mgc_value = raw_mgc_value
while mgc_value < mgc_range[1]:
step = 2
mgc_value += step
self.local_reg_tool.write_mgc_reg(exp_id, local_slot_id, local_lane_id, mgc_value)
logging.info(f'Adjusted mgc to {mgc_value}')
eye = self.get_eye_diagram_per_lane(exp_id, eye.rt_phys_index, eye.phys_lane_index)
logging.info(f'After mgc adjustment, eye issue: {eye.issue.name}')
if eye.issue != EyeIssue.EYE_TOO_SMALL:
logging.info(f'mgc adjustment successful, final value: {mgc_value}')
return eye
# 如果没有改善,恢复原始值
self.local_reg_tool.write_mgc_reg(exp_id, local_slot_id, local_lane_id, raw_mgc_value)
logging.info(f'No improvement, rollback mgc to {raw_mgc_value}')
return eye
def adjust_tia_peak_for_down_eye(self, exp_id: int, eye: EyeData, local_slot_id: int, local_lane_id: int) -> EyeData:
raw_tia_peak_value = self.local_reg_tool.read_tia_peak_reg(exp_id, local_slot_id, local_lane_id)
logging.info(f'Initial tia_peak_value: {raw_tia_peak_value}')
tia_peak_range = [0, 200]
tia_peak_value = raw_tia_peak_value
while tia_peak_value > tia_peak_range[0]:
step = 20
tia_peak_value -= step
if tia_peak_value < tia_peak_range[0]:
tia_peak_value = tia_peak_range[0]
self.local_reg_tool.write_tia_peak_reg(exp_id, local_slot_id, local_lane_id, tia_peak_value)
logging.info(f'Adjusted tia_peak to {tia_peak_value}')
eye = self.get_eye_diagram_per_lane(exp_id, eye.rt_phys_index, eye.phys_lane_index)
logging.info(f'After tia_peak adjustment, eye issue: {eye.issue.name}')
if eye.issue != EyeIssue.EYE_TOO_DOWN or tia_peak_value == tia_peak_range[0]:
break
logging.info(f'Final tia_peak adjustment result: tia_peak={tia_peak_value}, eye issue={eye.issue.name}')
return eye
def adjust_high_freq_for_down_eye(self, exp_id: int, eye: EyeData, remote_slot_id: int, remote_lane_id: int) -> EyeData:
raw_high_freq_value = self.remote_reg_tool.read_high_freq_reg(exp_id, remote_slot_id, remote_lane_id)
logging.info(f'Initial high_freq_value: {raw_high_freq_value}')
high_freq_range = [0, 150]
high_freq_value = raw_high_freq_value
while high_freq_value > high_freq_range[0]:
step = 20
high_freq_value -= step
if high_freq_value < high_freq_range[0]:
high_freq_value = high_freq_range[0]
self.remote_reg_tool.write_high_freq_reg(exp_id, remote_slot_id, remote_lane_id, high_freq_value)
logging.info(f'Adjusted high_freq to {high_freq_value}')
eye = self.get_eye_diagram_per_lane(exp_id, eye.rt_phys_index, eye.phys_lane_index)
logging.info(f'After high_freq adjustment, eye issue: {eye.issue.name}')
if eye.issue != EyeIssue.EYE_TOO_DOWN or high_freq_value == high_freq_range[0]:
break
logging.info(f'Final high_freq adjustment result: high_freq={high_freq_value}, eye issue={eye.issue.name}')
return eye
def adjust_tia_peak_for_up_eye(self, exp_id: int, eye: EyeData, local_slot_id: int, local_lane_id: int) -> EyeData:
raw_tia_peak_value = self.local_reg_tool.read_tia_peak_reg(exp_id, local_slot_id, local_lane_id)
logging.info(f'Initial tia_peak_value: {raw_tia_peak_value}')
tia_peak_range = [0, 200]
max_value = tia_peak_range[1]
tia_peak_value = raw_tia_peak_value
while tia_peak_value < max_value:
step = 20
tia_peak_value += step
if tia_peak_value > max_value:
tia_peak_value = max_value
self.local_reg_tool.write_tia_peak_reg(exp_id, local_slot_id, local_lane_id, tia_peak_value)
logging.info(f'Adjusted tia_peak to {tia_peak_value}')
eye = self.get_eye_diagram_per_lane(exp_id, eye.rt_phys_index, eye.phys_lane_index)
logging.info(f'After tia_peak adjustment, eye issue: {eye.issue.name}')
if eye.issue != EyeIssue.EYE_TOO_UP or tia_peak_value == max_value:
break
logging.info(f'Final tia_peak adjustment result: tia_peak={tia_peak_value}, eye issue={eye.issue.name}')
return eye
def adjust_high_freq_for_up_eye(self, exp_id: int, eye: EyeData, remote_slot_id: int, remote_lane_id: int) -> EyeData:
raw_high_freq_value = self.remote_reg_tool.read_high_freq_reg(exp_id, remote_slot_id, remote_lane_id)
logging.info(f'Initial high_freq_value: {raw_high_freq_value}')
high_freq_value = raw_high_freq_value
high_freq_range = [0, 150]
while high_freq_value < high_freq_range[1]:
step = 20
high_freq_value += step
if high_freq_value > high_freq_range[1]:
high_freq_value = high_freq_range[1]
self.remote_reg_tool.write_high_freq_reg(exp_id, remote_slot_id, remote_lane_id, high_freq_value)
logging.info(f'Adjusted high_freq to {high_freq_value}')
eye = self.get_eye_diagram_per_lane(exp_id, eye.rt_phys_index, eye.phys_lane_index)
logging.info(f'After high_freq adjustment, eye issue: {eye.issue.name}')
if eye.issue != EyeIssue.EYE_TOO_UP or high_freq_value == high_freq_range[1]:
break
logging.info(f'Final high_freq adjustment result: high_freq={high_freq_value}, eye issue={eye.issue.name}')
return eye
def adjust_opcurrent_for_up_eye(self, exp_id: int, eye: EyeData, remote_slot_id: int, remote_lane_id: int) -> EyeData:
raw_eye_issue = eye.issue
raw_opcurrent_value = self.remote_reg_tool.read_opcurrent_reg(exp_id, remote_slot_id, remote_lane_id)
logging.info(f'Initial opcurrent_value: {raw_opcurrent_value}')
op_range = [0, 190]
opcurrent_value = raw_opcurrent_value
while opcurrent_value > op_range[0]:
step = 10
opcurrent_value -= step
if opcurrent_value < op_range[0]:
opcurrent_value = op_range[0]
self.remote_reg_tool.write_opcurrent_reg(exp_id, remote_slot_id, remote_lane_id, opcurrent_value)
logging.info(f'Adjusted opcurrent to {opcurrent_value}')
eye = self.get_eye_diagram_per_lane(exp_id, eye.rt_phys_index, eye.phys_lane_index)
logging.info(f'After opcurrent adjustment, eye issue: {eye.issue.name}')
if eye.issue != raw_eye_issue or opcurrent_value == op_range[0]:
break
# 如果没有改善,恢复原始值
if eye.issue == raw_eye_issue:
self.remote_reg_tool.write_opcurrent_reg(exp_id, remote_slot_id, remote_lane_id, raw_opcurrent_value)
logging.info(f'No improvement, rollback opcurrent to {raw_opcurrent_value}')
else:
logging.info(f'opcurrent adjustment successful, final value: {opcurrent_value}')
return eye
def get_issue_eye_diagram(self, exp_id: int, retimer_index: int, attempts: int = 1) -> List[EyeData]:
all_measurements: DefaultDict[int, List[EyeData]] = defaultdict(list)
for attempt in range(1, attempts + 1):
logger.info(f"RTMR{retimer_index} measurement attempt {attempt}/{attempts}")
eye_data_list = self.get_eye_diagram(exp_id, retimer_index)
if not eye_data_list:
logger.error(f"Attempt {attempt} failed for RTMR{retimer_index}")
continue
for eye_data in eye_data_list:
all_measurements[eye_data.lane_index].append(eye_data)
if not all_measurements:
logger.error(f"No valid measurements for RTMR{retimer_index}")
return []
worst_eyes: List[EyeData] = []
for lane_idx, measurements in all_measurements.items():
logging.debug(f'---------- lan_inx: {lane_idx}, measurements: {measurements}')
worst_eye = self.determine_issue(measurements)
worst_eyes.append(worst_eye)
bad_eyes = [eye for eye in worst_eyes if eye.issue != EyeIssue.EYE_NORMAL]
return bad_eyes
def determine_issue(self, measurements: List[EyeData]):
# 优先级: 小,大,上,下,
# 假设不会有偏光导致眼图小的情况,一定是先解决眼图小和大的问题,再解决眼图上和下的问题
worst_eye = min(measurements, key=lambda x: x.quality_score)
if worst_eye.issue in [EyeIssue.EYE_NORMAL, EyeIssue.EYE_TOO_LARGE]:
# 如果是眼最小的是正常的或者偏大的, 那就返回最大的
worst_eye = max(measurements, key=lambda x: x.quality_score)
return worst_eye
def get_worst_eye_diagram_per_lane(self, exp_id: int, retimer_phys_index: int,
phys_lane: int, attempts: int = 3) -> EyeData:
measurements: List[EyeData] = []
for attempt in range(1, attempts + 1):
try:
logger.info(f"Attempt {attempt}/{attempts} for RTMR{retimer_phys_index} Lane{phys_lane}")
eye_data = self.get_eye_diagram_per_lane(exp_id, retimer_phys_index, phys_lane)
if eye_data.rt_phys_index != -1:
measurements.append(eye_data)
logger.info(f"Measurement {attempt}: {eye_data}")
# 如果发现眼图有问题,直接返回,不再继续查询
if eye_data.issue != EyeIssue.EYE_NORMAL:
logger.info(f"Eye issue detected: {eye_data.issue.name}, returning immediately")
return eye_data
else:
logger.warning(f"Invalid measurement in attempt {attempt}")
except Exception as e:
logger.error(f"Attempt {attempt} failed: {str(e)}")
# 如果所有尝试都是正常眼图,则进行综合判断
if not measurements:
logger.error(f"No valid measurements obtained after {attempts} attempts")
return EyeData() # 返回默认的空数据
# 找出质量最差的眼图
worst_eye = self.determine_issue(measurements)
logger.warning(f"Worst eye diagram for RTMR{retimer_phys_index} Lane{phys_lane}: "
f"Quality score: {worst_eye.quality_score:.2f}")
return worst_eye
def get_eye_diagram_per_lane(self, exp_id: int, retimer_phys_index: int,
phys_lane: int, attempts: int = 3) -> EyeData:
measurements: List[EyeData] = []
for attempt in range(1, attempts + 1):
try:
logger.info(f"Attempt {attempt}/{attempts} for RTMR{retimer_phys_index} Lane{phys_lane}")
eye_data = self.get_eye_diagram_per_lane_1_count(exp_id, retimer_phys_index, phys_lane)
if eye_data.rt_phys_index != -1:
measurements.append(eye_data)
logger.info(f"Measurement {attempt}: {eye_data}")
# 如果发现眼图有问题,直接返回,不再继续查询
if eye_data.issue != EyeIssue.EYE_NORMAL:
logger.info(f"Eye issue detected: {eye_data.issue.name}, returning immediately")
return eye_data
else:
logger.warning(f"Invalid measurement in attempt {attempt}")
except Exception as e:
logger.error(f"Attempt {attempt} failed: {str(e)}")
# 如果所有尝试都是正常眼图,则进行综合判断
if not measurements:
logger.error(f"No valid measurements obtained after {attempts} attempts")
return EyeData() # 返回默认的空数据
# 找出质量最差的眼图
worst_eye = self.determine_issue(measurements)
logger.warning(f"Worst eye diagram for RTMR{retimer_phys_index} Lane{phys_lane}: "
f"Quality score: {worst_eye.quality_score:.2f}")
return worst_eye
def get_eye_diagram_per_lane_1_count(self, exp_id: int, retimer_phys_index: int, phys_lane: int) -> EyeData:
eye_data = EyeData()
cmd = f"rtmr {retimer_phys_index} eye 0 a {phys_lane}"
try:
raw_output = self.local_bmc.CmdVendorCommand(exp_id, cmd)
# logging.info(f'raw_output: {raw_output}')
if raw_output == '':
logger.error(f"No data returned from command: {cmd}")
return eye_data
eye_datas = self.parse_eye_data(raw_output)
return eye_datas[0]
except Exception as e:
logger.error(f"Error executing command '{cmd}': {e}")
return eye_data
def get_eye_diagram(self, exp_id: int, retimer_index: int) -> List[EyeData]:
cmd = f"rtmr {retimer_index} eye 0 a"
try:
raw_output = self.local_bmc.CmdVendorCommand(exp_id, cmd)
if not raw_output:
logger.error(f"No data returned from command: {cmd}")
return []
return self.parse_eye_data(raw_output)
except Exception as e:
logger.error(f"Error executing command '{cmd}': {e}")
return []
def parse_eye_data(self, text: str) -> List[EyeData]:
pattern = (
r"RTMR(\d+)\s+EYE_A(\d+):\s*"
r"\((-?\d+\.?\d*),\s*(-?\d+\.?\d*)\)\s*mV\s*\|\s*"
r"\((-?\d+\.?\d*),\s*(-?\d+\.?\d*)\)\s*UI"
)
regex = re.compile(pattern)
results = []
for line_num, line in enumerate(text.strip().splitlines(), 1):
line = line.strip()
if not line or "error" in line.lower():
continue
match = regex.search(line) # 使用 search 允许前后有额外字符
if not match:
logger.debug(f"Line {line_num}: Skipped (no match) - {line}")
continue
try:
rtmr_prefix = match.group(1)
eye_index = int(match.group(2))
down = float(match.group(3))
top = float(match.group(4))
left = float(match.group(5))
right = float(match.group(6))
# 计算真实 retimer_index 和 lane_index
rt_phys_index = int(f"{rtmr_prefix}")
phys_lane_index = eye_index
if eye_index < 8:
rt_index = int(f"{rtmr_prefix}1")
lane_index = eye_index
else:
rt_index = int(f"{rtmr_prefix}2")
lane_index = eye_index - 8
eye_data = EyeData(
rt_phys_index=rt_phys_index,
phys_lane_index=phys_lane_index,
rt_index=rt_index,
lane_index=lane_index,
down=down,
top=top,
left=left,
right=right
)
eye_data.determine_issue() # 初始化问题类型
results.append(eye_data)
except Exception as e:
logger.error(f"Failed to parse line {line_num}: {line} | Error: {e}")
continue
logger.info(f"Parsed {len(results)} valid eye diagrams.")
return results
ocsdiag.py
def process_host(host, reg_table_file, exp_list, slot_list, lane_list, cmd, param, reg_wt_value, sl_file): logging.info(f'-----------process: {host}')
for exp_id in exp_list:
exp_id = int(exp_id)
exp_util = SmbusHttpUtil(f"https://{host}", "root", "0penBmc", host)
exp_util.lock(exp_id)
logging.info(f'----------process_host {host} lock')
vuart_util = VuartUtil(host)
bmc = DevWhiteRiverExp(exp_util, vuart_util)
route_info = bmc.GetOpticalRouteStatus(exp_id, 0)
logging.info(f'--------GetOpticalRouteStatus route_info:{route_info}')
yaml_route_map = {
'1111111111111111': '786-1-oneta',
'1212121212121212': '786-1-onetb',
'0201040306050807': '786-1-onoc1',
'0403020108070605': '786-1-onoc2',
'0807060504030201': '786-1-onoc3',
'0304010207080506': '786-1-onoc4',
'0605080702010403': '786-1-onoc5',
'0708050603040102': '786-1-onoc6',
'0506070801020304': '786-1-onoc7',
}
reg_table_file = f'main_data/{yaml_route_map[route_info]}.yaml'
route_name = yaml_route_map[route_info]
logging.info(f'input reg_table_file is empty, use default config file:{reg_table_file}, route_name:{route_name}')
reg_access_tool = OptRegAccessTool(host, reg_table_file, bmc)
logging.info(f'init reg_access_tool with reg_table_file:{reg_table_file}, route_name:{route_name}')
remote_bmc = None
remote_exp_util = exp_util
remote_host = host
remote_reg_access_tool = reg_access_tool
if cmd in ['ibias-auto-tune', 'eq-auto-tune'] and param != '':
logging.info(f'------- remote_host: {param}')
remote_host = param
remote_exp_util = SmbusHttpUtil(f"https://{remote_host}", "root", "0penBmc", remote_host)
remote_bmc = DevWhiteRiverExp(remote_exp_util, vuart_util)
remote_exp_util.lock(exp_id)
logging.info(f'----------process dst host {remote_host} lock')
remote_reg_access_tool = OptRegAccessTool(remote_host, reg_table_file, remote_bmc)
if cmd in ['eq-auto-tune']:
slot_list = [0,1,2,3,4,5,6,7]