# xz_cable_setup_check_tool.py build ```shell pyinstaller --onefile \ --paths=/home/gyou/NexusBench-baihe-br/nexusbench \ --paths=/home/gyou/NexusBench-baihe-br/nexusbench/log \ --paths=/home/gyou/NexusBench-baihe-br/nexusbench/connection \ --paths=/home/gyou/NexusBench-baihe-br/nexusbench/gpu/biren \ --hidden-import=connection.http_helper \ --runtime-tmpdir=/home/gyou/tmp \ /home/gyou/NexusBench-baihe-br/nexusbench/biren_test.py # 加了这一行就可以运行了 --runtime-tmpdir=/home/gyou/tmp \ --hidden-import=log.logger \ --hidden-import=connection.ssh_connection_manager \ ``` ./build/whiteriver_exp --host 10.57.216.109 --exp 2 --cmd vcmd --param rev ./vuart -i 10.57.216.109 -e 2 -c vcmd -p ver ./vuart -i 10.57.216.108 -e 4 -c fw-down -p ./build/whiteriver_exp -i 10.57.216.108 -e 4 -c fw-down -p ./build/whiteriver_exp -i 10.57.216.91 -e 4 -c vcmd -p ver net use X: \\10.57.216.173\shared /user:root ossdbg1 PicT1!2@3#4$ RCms@Zte3 ./build/whiteriver_exp -i 10.57.216.94 -e 4 -c fw-down -p "./whiteriver_exp@1.0.17+2508161844.img" ./build/whiteriver_exp -i 10.57.216.95,10.57.216.96,10.57.216.97,10.57.216.98 scp /usr/bin/ocs_link_reset root@10.57.216.166 scp -o "BatchMode=no" -o "StrictHostKeyChecking=no" -i pass.txt /usr/bin/ocs_link_reset root@10.57.216.166:/usr/bin/ scp -o "BatchMode=no" -o "StrictHostKeyChecking=no" -i pass.txt /usr/bin/ocs_link_reset root@10.57.216.165:/usr/bin/ scp -o "BatchMode=no" -o "StrictHostKeyChecking=no" -i pass.txt /usr/bin/ocs_link_reset root@10.57.216.187:/usr/bin/ scp -o "BatchMode=no" -o "StrictHostKeyChecking=no" -i pass.txt /usr/bin/ocs_link_reset root@10.57.216.148:/usr/bin/ scp -o "BatchMode=no" -o "StrictHostKeyChecking=no" -i pass.txt /usr/bin/ocs_link_reset root@10.57.216.163:/usr/bin/ scp -o "BatchMode=no" -o "StrictHostKeyChecking=no" -i pass.txt /usr/bin/ocs_link_reset root@10.57.216.139:/usr/bin/ scp -o "BatchMode=no" -o "StrictHostKeyChecking=no" -i pass.txt /usr/bin/ocs_link_reset root@10.57.216.173:/usr/bin/ scp -o "BatchMode=no" -o "StrictHostKeyChecking=no" -i pass.txt /usr/bin/ocs_link_reset root@10.57.216.167:/usr/bin/ scp -o "BatchMode=no" -o "StrictHostKeyChecking=no" -i pass.txt /usr/bin/ocs_link_reset root@10.57.216.134:/usr/bin/ scp -o "BatchMode=no" -o "StrictHostKeyChecking=no" -i pass.txt /usr/bin/ocs_link_reset root@10.57.216.145:/usr/bin/ scp -o "BatchMode=no" -o "StrictHostKeyChecking=no" -i pass.txt /usr/bin/ocs_link_reset root@10.57.216.176:/usr/bin/ scp -o "BatchMode=no" -o "StrictHostKeyChecking=no" -i pass.txt /usr/bin/ocs_link_reset root@10.57.216.180:/usr/bin/ scp -o "BatchMode=no" -o "StrictHostKeyChecking=no" -i pass.txt /usr/bin/ocs_link_reset root@10.57.216.185:/usr/bin/ scp -o "BatchMode=no" -o "StrictHostKeyChecking=no" -i pass.txt /usr/bin/ocs_link_reset root@10.57.216.150:/usr/bin/ scp -o "BatchMode=no" -o "StrictHostKeyChecking=no" -i pass.txt /usr/bin/ocs_link_reset root@10.57.216.168:/usr/bin/ scp -o "BatchMode=no" -o "StrictHostKeyChecking=no" -i pass.txt /usr/bin/ocs_link_reset root@10.57.216.174:/usr/bin/ scp -o "BatchMode=no" -o "StrictHostKeyChecking=no" -i pass.txt /usr/bin/ocs_link_reset root@10.57.216.132:/usr/bin/ scp -o "BatchMode=no" -o "StrictHostKeyChecking=no" -i pass.txt /usr/bin/ocs_link_reset root@10.57.216.189:/usr/bin/ scp -o "BatchMode=no" -o "StrictHostKeyChecking=no" -i pass.txt /usr/bin/ocs_link_reset root@10.57.216.151:/usr/bin/ scp -o "BatchMode=no" -o "StrictHostKeyChecking=no" -i pass.txt /usr/bin/ocs_link_reset root@10.57.216.156:/usr/bin/ from dataclasses import dataclass import re from typing import List, Optional, Dict, DefaultDict import time from enum import Enum import logging from collections import defaultdict from parser.topology_parser import TopoMappingParser from parser.transceiver_config_parser import TransceiverConfigParser from toolbox.opt_reg_access_tool import OptRegAccessTool from gpu.biren.exp_util import DevWhiteRiverExp from parser.ibias_rssi_map_parser import IbiasRssiMapParser logger = logging.getLogger(__name__) # -------------------------------------------------- # Eye Issue Definition (Corrected Logic) # -------------------------------------------------- class OptReg(Enum): MGC = '' class EyeIssue(Enum): EYE_NORMAL = 0 EYE_TOO_SMALL = 1 # 垂直开口太小: sum(|top|, |down|) < 30 EYE_TOO_LARGE = 2 EYE_TOO_DOWN = 3 # 眼图偏低: top < 15 EYE_TOO_UP = 4 # 眼图偏高: |down| < 15 (i.e., down > -15) @dataclass class EyeData: rt_phys_index: int = -1 phys_lane_index: int = -1 rt_index: int = -1 lane_index: int = -1 down: float = -1 # mV (usually negative) top: float = -1 # mV (usually positive) left: float = -1 # UI right: float = -1 # UI issue: EyeIssue = EyeIssue.EYE_NORMAL def __str__(self): return (f"RTMR{self.rt_index} EYE_A{self.lane_index:02d}: " f"({self.down:.1f}, {self.top:.1f}) mV | " f"({self.left:.2f}, {self.right:.2f}) UI") @property def vertical_amplitude(self) -> float: return abs(self.down) + abs(self.top) @property def horizontal_amplitude(self) -> float: return abs(self.left) + abs(self.right) @property def quality_score(self) -> float: return self.vertical_amplitude def determine_issue(self) -> None: if self.vertical_amplitude < 30: self.issue = EyeIssue.EYE_TOO_SMALL elif self.vertical_amplitude > 220: self.issue = EyeIssue.EYE_TOO_LARGE elif self.top < 15: self.issue = EyeIssue.EYE_TOO_DOWN elif self.down > -15: # 因为 down 是负值,> -15 表示离 0 太近(太高) self.issue = EyeIssue.EYE_TOO_UP else: self.issue = EyeIssue.EYE_NORMAL # -------------------------------------------------- # 🔧 EQ Tuning Tool # -------------------------------------------------- class EqTuneTool: def __init__(self, local_bmc: DevWhiteRiverExp, remote_bmc: DevWhiteRiverExp, local_reg_tool: OptRegAccessTool, remote_reg_tool: OptRegAccessTool, ibias_rssi_map: IbiasRssiMapParser, route_name: str): self.local_bmc = local_bmc self.remote_bmc = remote_bmc self.local_reg_tool = local_reg_tool self.remote_reg_tool = remote_reg_tool self.ibias_rssi_map = ibias_rssi_map self.route_name = route_name self.topo_map = TopoMappingParser("./main_data/topo_mapping.yaml") self.topo_map.parse() def eq_auto_tune(self, exp_id: int): cmd = 'ver' raw_output = self.local_bmc.CmdVendorCommand(exp_id, cmd) logging.info(raw_output) raw_tia_peak_value = self.local_reg_tool.read_tia_peak_reg(exp_id, 2, 0) logging.info(f'Test Initial tia_peak_value: {raw_tia_peak_value}') logger.info(f"Starting auto tune for exp id: {exp_id}") # retimers = [1, 2, 3, 4] retimers = [3] for retimer_index in retimers: bad_eyes = self.get_issue_eye_diagram(exp_id, retimer_index) time.sleep(1) if bad_eyes: self.process_worst_eyes(exp_id, retimer_index, bad_eyes) else: logger.info(f"No eye issues found on RTMR{retimer_index}") def process_worst_eyes(self, exp_id: int, retimer_index: int, bad_eyes: List[EyeData]) -> None: logger.warning(f"Detected eye issues on RTMR{retimer_index}:") for eye in bad_eyes: logger.warning( f"RTMR {eye.rt_index} Lane {eye.lane_index:02d}: {eye.issue.name} " f"(Vertical: {eye.vertical_amplitude:.1f}mV, Top={eye.top:.1f}, Down={eye.down:.1f})" ) for eye in bad_eyes: logger.warning( f"Starting Process RTMR {eye.rt_index} Lane {eye.lane_index:02d}: {eye.issue.name} " f"(Vertical: {eye.vertical_amplitude:.1f}mV, Top={eye.top:.1f}, Down={eye.down:.1f})" ) tmp_eye = eye max_retry_cout = 2 try_count = 0 while tmp_eye.issue != EyeIssue.EYE_NORMAL and try_count < max_retry_cout: logging.info(f'------try count: {try_count}') if tmp_eye.issue == EyeIssue.EYE_TOO_SMALL: self.process_small_eye(exp_id, retimer_index, eye) if tmp_eye.issue == EyeIssue.EYE_TOO_DOWN: self.process_down_eye(exp_id, retimer_index, eye) elif tmp_eye.issue == EyeIssue.EYE_TOO_UP: self.process_up_eye(exp_id, retimer_index, eye) # if tmp_eye.issue == EyeIssue.EYE_TOO_LARGE: # logging.info(f"Eye issue: {eye.issue.name}") try_count +=1 # # if tmp_eye.issue == EyeIssue.EYE_TOO_LARGE: # # 如果rssi很大,那适当减少,减少ibias # # slot = self.topo_map.get_remote_slot_by_retimer(eye.rt_index, eye.lane_index) # # if slot is None: # # break # # slot_id = slot[0] # # lane_id = slot[1] # # ibias_value = self.remote_reg_tool. def process_small_eye(self, exp_id: int, retimer_index: int, eye: EyeData): logger.info(f"Starting small eye issue processing for RTMR{retimer_index} lane {eye.lane_index}, adjusting ibias") # 获取slot信息 remote_slot = self.topo_map.get_remote_slot_by_retimer(eye.rt_index, eye.lane_index) local_slot = self.topo_map.get_local_slot_by_retimer(eye.rt_index, eye.lane_index) if remote_slot is None or local_slot is None: logger.error(f"Invalid slot for RTMR{retimer_index} lane {eye.lane_index}") return eye remote_slot_id = remote_slot[0] remote_lane_id = remote_slot[1] local_slot_id = local_slot[0] local_lane_id = local_slot[1] logger.info(f"Processing small eye for RTMR{eye.rt_index} lane {eye.lane_index}: " f"remote_slot=({remote_slot_id},{remote_lane_id}), " f"local_slot=({local_slot_id},{local_lane_id})") # 调节ibias logger.info(f"Adjusting ibias for RTMR{eye.rt_index} lane {eye.lane_index}") eye = self.adjust_ibias_for_small_eye(exp_id, eye, remote_slot_id, remote_lane_id, local_slot_id, local_lane_id) # 如果依然很小,调节opcurrent if eye.issue == EyeIssue.EYE_TOO_SMALL: logger.info(f"Eye still too small for RTMR{retimer_index} lane {eye.lane_index}, adjusting opcurrent") eye = self.adjust_opcurrent_for_small_down_eye(exp_id, eye, remote_slot_id, remote_lane_id) # eye = self.adjust_opcurrent_for_up_eye(exp_id, eye, remote_slot_id, remote_lane_id) # 如果依然很小,调节mgc if eye.issue == EyeIssue.EYE_TOO_SMALL: logger.info(f"Eye still too small for RTMR{retimer_index} lane {eye.lane_index}, adjusting mgc") eye = self.adjust_mgc_for_small_eye(exp_id, eye, local_slot_id, local_lane_id) logger.info(f"Completed small eye processing for RTMR{retimer_index} lane {eye.lane_index}, " f"final issue: {eye.issue.name}") return eye def process_down_eye(self, exp_id: int, retimer_index: int, eye: EyeData): logging.info(f"Starting down eye issue processing for RTMR{retimer_index} lane {eye.lane_index}") # 获取slot信息 remote_slot = self.topo_map.get_remote_slot_by_retimer(eye.rt_index, eye.lane_index) local_slot = self.topo_map.get_local_slot_by_retimer(eye.rt_index, eye.lane_index) if remote_slot is None or local_slot is None: logger.error(f"Invalid slot for RTMR{retimer_index} lane {eye.lane_index}") return eye remote_slot_id = remote_slot[0] remote_lane_id = remote_slot[1] local_slot_id = local_slot[0] local_lane_id = local_slot[1] logger.info(f"Processing down eye for RTMR{eye.rt_index} lane {eye.lane_index}: " f"remote_slot=({remote_slot_id},{remote_lane_id}), " f"local_slot=({local_slot_id},{local_lane_id})") # 先调节tia_peak(值调小) logger.info(f"Adjusting tia_peak for RTMR{eye.rt_index} lane {eye.lane_index}") eye = self.adjust_tia_peak_for_down_eye(exp_id, eye, local_slot_id, local_lane_id) # 如果依然偏下,调节high_freq(值调小) if eye.issue == EyeIssue.EYE_TOO_DOWN: logger.info(f"Eye still too down for RTMR{retimer_index} lane {eye.lane_index}, adjusting high_freq") eye = self.adjust_high_freq_for_down_eye(exp_id, eye, remote_slot_id, remote_lane_id) # 如果依然偏下,调节opcurrent if eye.issue == EyeIssue.EYE_TOO_DOWN: logger.info(f"Eye still too down for RTMR{retimer_index} lane {eye.lane_index}, adjusting opcurrent") eye = self.adjust_opcurrent_for_small_down_eye(exp_id, eye, remote_slot_id, remote_lane_id) logger.info(f"Completed down eye processing for RTMR{retimer_index} lane {eye.lane_index}, " f"final issue: {eye.issue.name}") return eye def process_up_eye(self, exp_id: int, retimer_index: int, eye: EyeData): logging.info(f"Starting up eye issue processing for RTMR{retimer_index} lane {eye.lane_index}") # 获取slot信息 remote_slot = self.topo_map.get_remote_slot_by_retimer(eye.rt_index, eye.lane_index) local_slot = self.topo_map.get_local_slot_by_retimer(eye.rt_index, eye.lane_index) if remote_slot is None or local_slot is None: logger.error(f"Invalid slot for RTMR{retimer_index} lane {eye.lane_index}") return eye remote_slot_id = remote_slot[0] remote_lane_id = remote_slot[1] local_slot_id = local_slot[0] local_lane_id = local_slot[1] logger.info(f"Processing up eye for RTMR{eye.rt_index} lane {eye.lane_index}: " f"remote_slot=({remote_slot_id},{remote_lane_id}), " f"local_slot=({local_slot_id},{local_lane_id})") # 先调节tia_peak(值调大) logger.info(f"Adjusting tia_peak for RTMR{eye.rt_index} lane {eye.lane_index}") eye = self.adjust_tia_peak_for_up_eye(exp_id, eye, local_slot_id, local_lane_id) # 如果依然偏高,调节high_freq(值调大) if eye.issue == EyeIssue.EYE_TOO_UP: logger.info(f"Eye still too up for RTMR{retimer_index} lane {eye.lane_index}, adjusting high_freq") eye = self.adjust_high_freq_for_up_eye(exp_id, eye, remote_slot_id, remote_lane_id) # 如果依然偏高,调节opcurrent(值调小) if eye.issue == EyeIssue.EYE_TOO_UP: logger.info(f"Eye still too up for RTMR{retimer_index} lane {eye.lane_index}, adjusting opcurrent") eye = self.adjust_opcurrent_for_up_eye(exp_id, eye, remote_slot_id, remote_lane_id) logger.info(f"Completed up eye processing for RTMR{retimer_index} lane {eye.lane_index}, " f"final issue: {eye.issue.name}") return eye def adjust_ibias_for_small_eye(self, exp_id: int, eye: EyeData, remote_slot_id: int, remote_lane_id: int, local_slot_id: int, local_lane_id: int) -> EyeData: ibias_range = [2000, 3200] rssi_range = [5000, 10000] ibias_value = self.remote_reg_tool.read_ibias_reg(exp_id, remote_slot_id, remote_lane_id) logging.info(f'Initial ibias_value: {ibias_value}') if ibias_value == 0: logging.warning(f'read ibias reg again') ibias_value = self.remote_reg_tool.read_ibias_reg(exp_id, remote_slot_id, remote_lane_id) logging.info(f'Initial ibias_value: {ibias_value}') while ibias_value < ibias_range[1]: # 根据当前值确定步长 step = 300 if ibias_value < 2500 else 150 ibias_value += step self.remote_reg_tool.write_ibias_reg(exp_id, remote_slot_id, remote_lane_id, ibias_value) logging.info(f'Adjusted ibias to {ibias_value} with step {step}') ibias_lane = self.remote_reg_tool.get_ibias_by_logic_lane(remote_lane_id) rssi_lanes = self.ibias_rssi_map.get_rssi_lane(self.route_name, ibias_lane) if rssi_lanes is None: logging.error('RSSI lanes is none') break rssi1_value = self.local_reg_tool.read_rssi_reg(exp_id, local_slot_id, rssi_lanes[0]) rssi2_value = self.local_reg_tool.read_rssi_reg(exp_id, local_slot_id, rssi_lanes[1]) logging.info(f'RSSI values: rssi1={rssi1_value}, rssi2={rssi2_value}') is_margin = False if rssi1_value > rssi_range[1] or rssi2_value > rssi_range[1]: # 回退一步 self.remote_reg_tool.write_ibias_reg(exp_id, remote_slot_id, remote_lane_id, ibias_value - step) logging.info(f'RSSI margin exceeded, rollback ibias to {ibias_value - step}') is_margin = True eye = self.get_eye_diagram_per_lane(exp_id, eye.rt_phys_index, eye.phys_lane_index) logging.info(f'After ibias adjustment, eye issue: {eye.issue.name}') if eye.issue != EyeIssue.EYE_TOO_SMALL or is_margin: break logging.info(f'Final ibias adjustment result: ibias={ibias_value}, eye issue={eye.issue.name}') return eye def adjust_opcurrent_for_small_down_eye(self, exp_id: int, eye: EyeData, remote_slot_id: int, remote_lane_id: int) -> EyeData: raw_eye_issue = eye.issue raw_opcurrent_value = self.remote_reg_tool.read_opcurrent_reg(exp_id, remote_slot_id, remote_lane_id) logging.info(f'Initial opcurrent_value: {raw_opcurrent_value}') op_range = [100, 190] opcurrent_value = raw_opcurrent_value while opcurrent_value < op_range[1]: step = 10 opcurrent_value += step if opcurrent_value > op_range[1]: opcurrent_value = op_range[1] self.remote_reg_tool.write_opcurrent_reg(exp_id, remote_slot_id, remote_lane_id, opcurrent_value) logging.info(f'Adjusted opcurrent to {opcurrent_value}') eye = self.get_eye_diagram_per_lane(exp_id, eye.rt_phys_index, eye.phys_lane_index) logging.info(f'After opcurrent adjustment, eye issue: {eye.issue.name}') if eye.issue != raw_eye_issue or opcurrent_value == op_range[1]: break # 如果没有改善,恢复原始值 if eye.issue == raw_eye_issue: self.remote_reg_tool.write_opcurrent_reg(exp_id, remote_slot_id, remote_lane_id, raw_opcurrent_value) logging.info(f'No improvement, rollback opcurrent to {raw_opcurrent_value}') else: logging.info(f'opcurrent adjustment successful, final value: {opcurrent_value}') return eye def adjust_mgc_for_small_eye(self, exp_id: int, eye: EyeData, local_slot_id: int, local_lane_id: int) -> EyeData: raw_mgc_value = self.local_reg_tool.read_mgc_reg(exp_id, local_slot_id, local_lane_id) logging.info(f'Initial mgc_value: {raw_mgc_value}') mgc_range = [raw_mgc_value, raw_mgc_value + 10] mgc_value = raw_mgc_value while mgc_value < mgc_range[1]: step = 2 mgc_value += step self.local_reg_tool.write_mgc_reg(exp_id, local_slot_id, local_lane_id, mgc_value) logging.info(f'Adjusted mgc to {mgc_value}') eye = self.get_eye_diagram_per_lane(exp_id, eye.rt_phys_index, eye.phys_lane_index) logging.info(f'After mgc adjustment, eye issue: {eye.issue.name}') if eye.issue != EyeIssue.EYE_TOO_SMALL: logging.info(f'mgc adjustment successful, final value: {mgc_value}') return eye # 如果没有改善,恢复原始值 self.local_reg_tool.write_mgc_reg(exp_id, local_slot_id, local_lane_id, raw_mgc_value) logging.info(f'No improvement, rollback mgc to {raw_mgc_value}') return eye def adjust_tia_peak_for_down_eye(self, exp_id: int, eye: EyeData, local_slot_id: int, local_lane_id: int) -> EyeData: raw_tia_peak_value = self.local_reg_tool.read_tia_peak_reg(exp_id, local_slot_id, local_lane_id) logging.info(f'Initial tia_peak_value: {raw_tia_peak_value}') tia_peak_range = [0, 200] tia_peak_value = raw_tia_peak_value while tia_peak_value > tia_peak_range[0]: step = 20 tia_peak_value -= step if tia_peak_value < tia_peak_range[0]: tia_peak_value = tia_peak_range[0] self.local_reg_tool.write_tia_peak_reg(exp_id, local_slot_id, local_lane_id, tia_peak_value) logging.info(f'Adjusted tia_peak to {tia_peak_value}') eye = self.get_eye_diagram_per_lane(exp_id, eye.rt_phys_index, eye.phys_lane_index) logging.info(f'After tia_peak adjustment, eye issue: {eye.issue.name}') if eye.issue != EyeIssue.EYE_TOO_DOWN or tia_peak_value == tia_peak_range[0]: break logging.info(f'Final tia_peak adjustment result: tia_peak={tia_peak_value}, eye issue={eye.issue.name}') return eye def adjust_high_freq_for_down_eye(self, exp_id: int, eye: EyeData, remote_slot_id: int, remote_lane_id: int) -> EyeData: raw_high_freq_value = self.remote_reg_tool.read_high_freq_reg(exp_id, remote_slot_id, remote_lane_id) logging.info(f'Initial high_freq_value: {raw_high_freq_value}') high_freq_range = [0, 150] high_freq_value = raw_high_freq_value while high_freq_value > high_freq_range[0]: step = 20 high_freq_value -= step if high_freq_value < high_freq_range[0]: high_freq_value = high_freq_range[0] self.remote_reg_tool.write_high_freq_reg(exp_id, remote_slot_id, remote_lane_id, high_freq_value) logging.info(f'Adjusted high_freq to {high_freq_value}') eye = self.get_eye_diagram_per_lane(exp_id, eye.rt_phys_index, eye.phys_lane_index) logging.info(f'After high_freq adjustment, eye issue: {eye.issue.name}') if eye.issue != EyeIssue.EYE_TOO_DOWN or high_freq_value == high_freq_range[0]: break logging.info(f'Final high_freq adjustment result: high_freq={high_freq_value}, eye issue={eye.issue.name}') return eye def adjust_tia_peak_for_up_eye(self, exp_id: int, eye: EyeData, local_slot_id: int, local_lane_id: int) -> EyeData: raw_tia_peak_value = self.local_reg_tool.read_tia_peak_reg(exp_id, local_slot_id, local_lane_id) logging.info(f'Initial tia_peak_value: {raw_tia_peak_value}') tia_peak_range = [0, 200] max_value = tia_peak_range[1] tia_peak_value = raw_tia_peak_value while tia_peak_value < max_value: step = 20 tia_peak_value += step if tia_peak_value > max_value: tia_peak_value = max_value self.local_reg_tool.write_tia_peak_reg(exp_id, local_slot_id, local_lane_id, tia_peak_value) logging.info(f'Adjusted tia_peak to {tia_peak_value}') eye = self.get_eye_diagram_per_lane(exp_id, eye.rt_phys_index, eye.phys_lane_index) logging.info(f'After tia_peak adjustment, eye issue: {eye.issue.name}') if eye.issue != EyeIssue.EYE_TOO_UP or tia_peak_value == max_value: break logging.info(f'Final tia_peak adjustment result: tia_peak={tia_peak_value}, eye issue={eye.issue.name}') return eye def adjust_high_freq_for_up_eye(self, exp_id: int, eye: EyeData, remote_slot_id: int, remote_lane_id: int) -> EyeData: raw_high_freq_value = self.remote_reg_tool.read_high_freq_reg(exp_id, remote_slot_id, remote_lane_id) logging.info(f'Initial high_freq_value: {raw_high_freq_value}') high_freq_value = raw_high_freq_value high_freq_range = [0, 150] while high_freq_value < high_freq_range[1]: step = 20 high_freq_value += step if high_freq_value > high_freq_range[1]: high_freq_value = high_freq_range[1] self.remote_reg_tool.write_high_freq_reg(exp_id, remote_slot_id, remote_lane_id, high_freq_value) logging.info(f'Adjusted high_freq to {high_freq_value}') eye = self.get_eye_diagram_per_lane(exp_id, eye.rt_phys_index, eye.phys_lane_index) logging.info(f'After high_freq adjustment, eye issue: {eye.issue.name}') if eye.issue != EyeIssue.EYE_TOO_UP or high_freq_value == high_freq_range[1]: break logging.info(f'Final high_freq adjustment result: high_freq={high_freq_value}, eye issue={eye.issue.name}') return eye def adjust_opcurrent_for_up_eye(self, exp_id: int, eye: EyeData, remote_slot_id: int, remote_lane_id: int) -> EyeData: raw_eye_issue = eye.issue raw_opcurrent_value = self.remote_reg_tool.read_opcurrent_reg(exp_id, remote_slot_id, remote_lane_id) logging.info(f'Initial opcurrent_value: {raw_opcurrent_value}') op_range = [0, 190] opcurrent_value = raw_opcurrent_value while opcurrent_value > op_range[0]: step = 10 opcurrent_value -= step if opcurrent_value < op_range[0]: opcurrent_value = op_range[0] self.remote_reg_tool.write_opcurrent_reg(exp_id, remote_slot_id, remote_lane_id, opcurrent_value) logging.info(f'Adjusted opcurrent to {opcurrent_value}') eye = self.get_eye_diagram_per_lane(exp_id, eye.rt_phys_index, eye.phys_lane_index) logging.info(f'After opcurrent adjustment, eye issue: {eye.issue.name}') if eye.issue != raw_eye_issue or opcurrent_value == op_range[0]: break # 如果没有改善,恢复原始值 if eye.issue == raw_eye_issue: self.remote_reg_tool.write_opcurrent_reg(exp_id, remote_slot_id, remote_lane_id, raw_opcurrent_value) logging.info(f'No improvement, rollback opcurrent to {raw_opcurrent_value}') else: logging.info(f'opcurrent adjustment successful, final value: {opcurrent_value}') return eye def get_issue_eye_diagram(self, exp_id: int, retimer_index: int, attempts: int = 1) -> List[EyeData]: all_measurements: DefaultDict[int, List[EyeData]] = defaultdict(list) for attempt in range(1, attempts + 1): logger.info(f"RTMR{retimer_index} measurement attempt {attempt}/{attempts}") eye_data_list = self.get_eye_diagram(exp_id, retimer_index) if not eye_data_list: logger.error(f"Attempt {attempt} failed for RTMR{retimer_index}") continue for eye_data in eye_data_list: all_measurements[eye_data.lane_index].append(eye_data) if not all_measurements: logger.error(f"No valid measurements for RTMR{retimer_index}") return [] worst_eyes: List[EyeData] = [] for lane_idx, measurements in all_measurements.items(): logging.debug(f'---------- lan_inx: {lane_idx}, measurements: {measurements}') worst_eye = self.determine_issue(measurements) worst_eyes.append(worst_eye) bad_eyes = [eye for eye in worst_eyes if eye.issue != EyeIssue.EYE_NORMAL] return bad_eyes def determine_issue(self, measurements: List[EyeData]): # 优先级: 小,大,上,下, # 假设不会有偏光导致眼图小的情况,一定是先解决眼图小和大的问题,再解决眼图上和下的问题 worst_eye = min(measurements, key=lambda x: x.quality_score) if worst_eye.issue in [EyeIssue.EYE_NORMAL, EyeIssue.EYE_TOO_LARGE]: # 如果是眼最小的是正常的或者偏大的, 那就返回最大的 worst_eye = max(measurements, key=lambda x: x.quality_score) return worst_eye def get_worst_eye_diagram_per_lane(self, exp_id: int, retimer_phys_index: int, phys_lane: int, attempts: int = 3) -> EyeData: measurements: List[EyeData] = [] for attempt in range(1, attempts + 1): try: logger.info(f"Attempt {attempt}/{attempts} for RTMR{retimer_phys_index} Lane{phys_lane}") eye_data = self.get_eye_diagram_per_lane(exp_id, retimer_phys_index, phys_lane) if eye_data.rt_phys_index != -1: measurements.append(eye_data) logger.info(f"Measurement {attempt}: {eye_data}") # 如果发现眼图有问题,直接返回,不再继续查询 if eye_data.issue != EyeIssue.EYE_NORMAL: logger.info(f"Eye issue detected: {eye_data.issue.name}, returning immediately") return eye_data else: logger.warning(f"Invalid measurement in attempt {attempt}") except Exception as e: logger.error(f"Attempt {attempt} failed: {str(e)}") # 如果所有尝试都是正常眼图,则进行综合判断 if not measurements: logger.error(f"No valid measurements obtained after {attempts} attempts") return EyeData() # 返回默认的空数据 # 找出质量最差的眼图 worst_eye = self.determine_issue(measurements) logger.warning(f"Worst eye diagram for RTMR{retimer_phys_index} Lane{phys_lane}: " f"Quality score: {worst_eye.quality_score:.2f}") return worst_eye def get_eye_diagram_per_lane(self, exp_id: int, retimer_phys_index: int, phys_lane: int, attempts: int = 3) -> EyeData: measurements: List[EyeData] = [] for attempt in range(1, attempts + 1): try: logger.info(f"Attempt {attempt}/{attempts} for RTMR{retimer_phys_index} Lane{phys_lane}") eye_data = self.get_eye_diagram_per_lane_1_count(exp_id, retimer_phys_index, phys_lane) if eye_data.rt_phys_index != -1: measurements.append(eye_data) logger.info(f"Measurement {attempt}: {eye_data}") # 如果发现眼图有问题,直接返回,不再继续查询 if eye_data.issue != EyeIssue.EYE_NORMAL: logger.info(f"Eye issue detected: {eye_data.issue.name}, returning immediately") return eye_data else: logger.warning(f"Invalid measurement in attempt {attempt}") except Exception as e: logger.error(f"Attempt {attempt} failed: {str(e)}") # 如果所有尝试都是正常眼图,则进行综合判断 if not measurements: logger.error(f"No valid measurements obtained after {attempts} attempts") return EyeData() # 返回默认的空数据 # 找出质量最差的眼图 worst_eye = self.determine_issue(measurements) logger.warning(f"Worst eye diagram for RTMR{retimer_phys_index} Lane{phys_lane}: " f"Quality score: {worst_eye.quality_score:.2f}") return worst_eye def get_eye_diagram_per_lane_1_count(self, exp_id: int, retimer_phys_index: int, phys_lane: int) -> EyeData: eye_data = EyeData() cmd = f"rtmr {retimer_phys_index} eye 0 a {phys_lane}" try: raw_output = self.local_bmc.CmdVendorCommand(exp_id, cmd) # logging.info(f'raw_output: {raw_output}') if raw_output == '': logger.error(f"No data returned from command: {cmd}") return eye_data eye_datas = self.parse_eye_data(raw_output) return eye_datas[0] except Exception as e: logger.error(f"Error executing command '{cmd}': {e}") return eye_data def get_eye_diagram(self, exp_id: int, retimer_index: int) -> List[EyeData]: cmd = f"rtmr {retimer_index} eye 0 a" try: raw_output = self.local_bmc.CmdVendorCommand(exp_id, cmd) if not raw_output: logger.error(f"No data returned from command: {cmd}") return [] return self.parse_eye_data(raw_output) except Exception as e: logger.error(f"Error executing command '{cmd}': {e}") return [] def parse_eye_data(self, text: str) -> List[EyeData]: pattern = ( r"RTMR(\d+)\s+EYE_A(\d+):\s*" r"\((-?\d+\.?\d*),\s*(-?\d+\.?\d*)\)\s*mV\s*\|\s*" r"\((-?\d+\.?\d*),\s*(-?\d+\.?\d*)\)\s*UI" ) regex = re.compile(pattern) results = [] for line_num, line in enumerate(text.strip().splitlines(), 1): line = line.strip() if not line or "error" in line.lower(): continue match = regex.search(line) # 使用 search 允许前后有额外字符 if not match: logger.debug(f"Line {line_num}: Skipped (no match) - {line}") continue try: rtmr_prefix = match.group(1) eye_index = int(match.group(2)) down = float(match.group(3)) top = float(match.group(4)) left = float(match.group(5)) right = float(match.group(6)) # 计算真实 retimer_index 和 lane_index rt_phys_index = int(f"{rtmr_prefix}") phys_lane_index = eye_index if eye_index < 8: rt_index = int(f"{rtmr_prefix}1") lane_index = eye_index else: rt_index = int(f"{rtmr_prefix}2") lane_index = eye_index - 8 eye_data = EyeData( rt_phys_index=rt_phys_index, phys_lane_index=phys_lane_index, rt_index=rt_index, lane_index=lane_index, down=down, top=top, left=left, right=right ) eye_data.determine_issue() # 初始化问题类型 results.append(eye_data) except Exception as e: logger.error(f"Failed to parse line {line_num}: {line} | Error: {e}") continue logger.info(f"Parsed {len(results)} valid eye diagrams.") return results # ocsdiag.py def process_host(host, reg_table_file, exp_list, slot_list, lane_list, cmd, param, reg_wt_value, sl_file): logging.info(f'-----------process: {host}') for exp_id in exp_list: exp_id = int(exp_id) exp_util = SmbusHttpUtil(f"https://{host}", "root", "0penBmc", host) exp_util.lock(exp_id) logging.info(f'----------process_host {host} lock') vuart_util = VuartUtil(host) bmc = DevWhiteRiverExp(exp_util, vuart_util) route_info = bmc.GetOpticalRouteStatus(exp_id, 0) logging.info(f'--------GetOpticalRouteStatus route_info:{route_info}') yaml_route_map = { '1111111111111111': '786-1-oneta', '1212121212121212': '786-1-onetb', '0201040306050807': '786-1-onoc1', '0403020108070605': '786-1-onoc2', '0807060504030201': '786-1-onoc3', '0304010207080506': '786-1-onoc4', '0605080702010403': '786-1-onoc5', '0708050603040102': '786-1-onoc6', '0506070801020304': '786-1-onoc7', } reg_table_file = f'main_data/{yaml_route_map[route_info]}.yaml' route_name = yaml_route_map[route_info] logging.info(f'input reg_table_file is empty, use default config file:{reg_table_file}, route_name:{route_name}') reg_access_tool = OptRegAccessTool(host, reg_table_file, bmc) logging.info(f'init reg_access_tool with reg_table_file:{reg_table_file}, route_name:{route_name}') remote_bmc = None remote_exp_util = exp_util remote_host = host remote_reg_access_tool = reg_access_tool if cmd in ['ibias-auto-tune', 'eq-auto-tune'] and param != '': logging.info(f'------- remote_host: {param}') remote_host = param remote_exp_util = SmbusHttpUtil(f"https://{remote_host}", "root", "0penBmc", remote_host) remote_bmc = DevWhiteRiverExp(remote_exp_util, vuart_util) remote_exp_util.lock(exp_id) logging.info(f'----------process dst host {remote_host} lock') remote_reg_access_tool = OptRegAccessTool(remote_host, reg_table_file, remote_bmc) if cmd in ['eq-auto-tune']: slot_list = [0,1,2,3,4,5,6,7]