Hi Jan,
On Sat, 11 Nov 2017, Jan Stary wrote:
> This is current/amd64 on a Dell Latitude E5570 (dmesg below).
> When booting without the ethernet cable plugged in,
> the boot sequence finishes with the following message:
>
> em0: Hardware Initialization Failed
> em0: Unable to initialize the hardware
We had similar problems with some HP laptops.
> When I boot with the cable plugged in, everything works as expected,
> like it always has. But now it seems that the ethernet cable _must_
> be plugged in at boot, otherwise em0 will just not work.
>
> Can somebody with em(4) reproduce?
> How can I debug this?
Can you please try if the patch below helps?
If yes, can you please also try without the msec_delay line after the
"Magic delay ..." comment? Note that in our case, it without that
delay, it would work most of the time but not always. So you will have
to try it several times (10 ... 20) to be sure that it's reliable.
I have only tested the patch with older openbsd releases, but I expect
it works on current, too.
Cheers,
Stefan
commit aa7c279debd5c66e1d2a0b3c18ceb20ef32ce7b7
Author: Stefan Fritsch <sf@sfritsch.de>
Date: Fri Dec 1 09:56:58 2017 +0100
34236: em: Fixes/workarounds for em on HP laptops
Some em chips have a semaphore ("software flag") to synchronize access
to certain registers between OS and firmware (ME/AMT).
Make the logic to get the flag match the logic in freebsd. This includes
higher timeouts and waiting for a previous unlock to complete before
trying a lock again.
Another problem was that openbsd em driver calls em_get_software_flag
recursively, which causes the semaphore to be unlocked too early. Make
em_get_software_flag/em_release_software_flag handle this correctly.
Freebsd does not do this, but they have a mutex that probably allows
them to detect recursive calls to e1000_acquire_swflag_ich8lan().
Reworking the openbsd driver to not recursively get the semaphore would
be very invasive.
Also port the logic from freebsd to em_check_phy_reset_block(). A single
read does not seem to be reliable.
Also, increase delay after reset to 20ms, which is the value in freebsd
for ich8lan.
The changes so far make things more reliable, but not 100%. Add another
1ms delay that seems to help with the remaining #34195 problems on HP
elitebook. A printf() at the same place helps, too.
While there, print mac+phy type in em_attach(), and em_init_hw() error
code if something goes wrong.
diff --git a/sys/dev/pci/if_em.c b/sys/dev/pci/if_em.c
index 985a464aaf9..5b6f3479bf5 100644
--- a/sys/dev/pci/if_em.c
+++ b/sys/dev/pci/if_em.c
@@ -545,6 +545,8 @@ em_attach(struct device *parent, struct device *self, void *aux)
if (!defer)
em_update_link_status(sc);
+ printf(", mac_type %#x phy_type %#x ", sc->hw.mac_type,
+ sc->hw.phy_type);
printf(", address %s\n", ether_sprintf(sc->sc_ac.ac_enaddr));
/* Indicate SOL/IDER usage */
@@ -1847,8 +1849,8 @@ em_hardware_init(struct em_softc *sc)
INIT_DEBUGOUT("\nHardware Initialization Deferred ");
return (EAGAIN);
}
- printf("\n%s: Hardware Initialization Failed\n",
- DEVNAME(sc));
+ printf("\n%s: Hardware Initialization Failed: %d\n",
+ DEVNAME(sc), ret_val);
return (EIO);
}
diff --git a/sys/dev/pci/if_em_hw.c b/sys/dev/pci/if_em_hw.c
index bd94aca904b..c2aa43ed342 100644
--- a/sys/dev/pci/if_em_hw.c
+++ b/sys/dev/pci/if_em_hw.c
@@ -929,7 +929,9 @@ em_reset_hw(struct em_hw *hw)
}
em_get_software_flag(hw);
E1000_WRITE_REG(hw, CTRL, (ctrl | E1000_CTRL_RST));
- msec_delay(5);
+ /* HW reset releases software_flag */
+ hw->sw_flag = 0;
+ msec_delay(20);
/* Ungate automatic PHY configuration on non-managed 82579 */
if (hw->mac_type == em_pch2lan && !hw->phy_reset_disable &&
@@ -1473,6 +1475,8 @@ em_init_hw(struct em_hw *hw)
/* Set the media type and TBI compatibility */
em_set_media_type(hw);
+ /* Magic delay that improves problems with i219LM on HP Elitebook */
+ msec_delay(1);
/* Must be called after em_set_media_type because media_type is used */
em_initialize_hardware_bits(hw);
@@ -9504,9 +9508,18 @@ em_check_phy_reset_block(struct em_hw *hw)
DEBUGFUNC("em_check_phy_reset_block\n");
if (IS_ICH8(hw->mac_type)) {
- fwsm = E1000_READ_REG(hw, FWSM);
- return (fwsm & E1000_FWSM_RSPCIPHY) ? E1000_SUCCESS :
- E1000_BLK_PHY_RESET;
+ int i = 0;
+ int blocked = 0;
+ do {
+ fwsm = E1000_READ_REG(hw, FWSM);
+ if (!(fwsm & E1000_FWSM_RSPCIPHY)) {
+ blocked = 1;
+ msec_delay(10);
+ continue;
+ }
+ blocked = 0;
+ } while (blocked && (i++ < 30));
+ return blocked ? E1000_BLK_PHY_RESET : E1000_SUCCESS;
}
if (hw->mac_type > em_82547_rev_2)
manc = E1000_READ_REG(hw, MANC);
@@ -9567,11 +9580,27 @@ em_get_software_flag(struct em_hw *hw)
DEBUGFUNC("em_get_software_flag");
if (IS_ICH8(hw->mac_type)) {
+ if (hw->sw_flag) {
+ hw->sw_flag++;
+ return E1000_SUCCESS;
+ }
while (timeout) {
extcnf_ctrl = E1000_READ_REG(hw, EXTCNF_CTRL);
- extcnf_ctrl |= E1000_EXTCNF_CTRL_SWFLAG;
- E1000_WRITE_REG(hw, EXTCNF_CTRL, extcnf_ctrl);
+ if (!(extcnf_ctrl & E1000_EXTCNF_CTRL_SWFLAG))
+ break;
+ msec_delay_irq(1);
+ timeout--;
+ }
+ if (!timeout) {
+ printf("%s: SW has already locked the resource?\n",
+ __func__);
+ return -E1000_ERR_CONFIG;
+ }
+ timeout = SW_FLAG_TIMEOUT;
+ extcnf_ctrl |= E1000_EXTCNF_CTRL_SWFLAG;
+ E1000_WRITE_REG(hw, EXTCNF_CTRL, extcnf_ctrl);
+ while (timeout) {
extcnf_ctrl = E1000_READ_REG(hw, EXTCNF_CTRL);
if (extcnf_ctrl & E1000_EXTCNF_CTRL_SWFLAG)
break;
@@ -9580,10 +9609,15 @@ em_get_software_flag(struct em_hw *hw)
}
if (!timeout) {
- DEBUGOUT("FW or HW locks the resource too long.\n");
+ printf("Failed to acquire the semaphore, FW or HW "
+ "has it: FWSM=0x%8.8x EXTCNF_CTRL=0x%8.8x)\n",
+ E1000_READ_REG(hw, FWSM), extcnf_ctrl);
+ extcnf_ctrl &= ~E1000_EXTCNF_CTRL_SWFLAG;
+ E1000_WRITE_REG(hw, EXTCNF_CTRL, extcnf_ctrl);
return -E1000_ERR_CONFIG;
}
}
+ hw->sw_flag++;
return E1000_SUCCESS;
}
@@ -9603,6 +9637,13 @@ em_release_software_flag(struct em_hw *hw)
DEBUGFUNC("em_release_software_flag");
if (IS_ICH8(hw->mac_type)) {
+ if (hw->sw_flag <= 0) {
+ printf("%s: not locked!\n", __func__);
+ return;
+ }
+ hw->sw_flag--;
+ if (hw->sw_flag > 0)
+ return;
extcnf_ctrl = E1000_READ_REG(hw, EXTCNF_CTRL);
extcnf_ctrl &= ~E1000_EXTCNF_CTRL_SWFLAG;
E1000_WRITE_REG(hw, EXTCNF_CTRL, extcnf_ctrl);
diff --git a/sys/dev/pci/if_em_hw.h b/sys/dev/pci/if_em_hw.h
index a897269abde..c1f601b4748 100644
--- a/sys/dev/pci/if_em_hw.h
+++ b/sys/dev/pci/if_em_hw.h
@@ -1622,6 +1622,7 @@ struct em_hw {
uint8_t bus_func;
uint16_t swfw;
boolean_t eee_enable;
+ int sw_flag;
};
#define E1000_EEPROM_SWDPIN0 0x0001 /* SWDPIN 0 EEPROM Value */
@@ -2743,6 +2744,8 @@ struct em_host_command_info {
#define AUTO_READ_DONE_TIMEOUT 10
/* Number of milliseconds we wait for PHY configuration done after MAC reset */
#define PHY_CFG_TIMEOUT 100
+/* SW Semaphore flag timeout in ms */
+#define SW_FLAG_TIMEOUT 1000
#define E1000_TX_BUFFER_SIZE ((uint32_t)1514)
No comments:
Post a Comment