From dc428860cf1ef8df4df5b53b97ea61f551f50b11 Mon Sep 17 00:00:00 2001 From: Aikar Date: Sun, 12 Aug 2018 13:27:23 -0400 Subject: [PATCH] Improve Watchdog Early Warning Feature - Closes #1319 1) Don't kick in until server has started (the full crash will still kick in before full start) 2) Delay reporting until 10 seconds, then print every 5 3) Make the intervals configurable 4) Make it able to be disabled by setting every interval to <= 0 --- ...Add-5-second-short-dumps-to-watchdog.patch | 87 --------- ...dd-Early-Warning-Feature-to-WatchDog.patch | 166 ++++++++++++++++++ 2 files changed, 166 insertions(+), 87 deletions(-) delete mode 100644 Spigot-Server-Patches/0349-Add-5-second-short-dumps-to-watchdog.patch create mode 100644 Spigot-Server-Patches/0349-Add-Early-Warning-Feature-to-WatchDog.patch diff --git a/Spigot-Server-Patches/0349-Add-5-second-short-dumps-to-watchdog.patch b/Spigot-Server-Patches/0349-Add-5-second-short-dumps-to-watchdog.patch deleted file mode 100644 index 63dfe16f0e..0000000000 --- a/Spigot-Server-Patches/0349-Add-5-second-short-dumps-to-watchdog.patch +++ /dev/null @@ -1,87 +0,0 @@ -From a30613e82fe3e0e0759e03c7d6b4b69d60e005f0 Mon Sep 17 00:00:00 2001 -From: miclebrick -Date: Wed, 8 Aug 2018 15:30:52 -0400 -Subject: [PATCH] Add 5 second short dumps to watchdog - - -diff --git a/src/main/java/org/spigotmc/WatchdogThread.java b/src/main/java/org/spigotmc/WatchdogThread.java -index 57a4748a3..815c3e664 100644 ---- a/src/main/java/org/spigotmc/WatchdogThread.java -+++ b/src/main/java/org/spigotmc/WatchdogThread.java -@@ -13,8 +13,10 @@ public class WatchdogThread extends Thread - - private static WatchdogThread instance; - private final long timeoutTime; -+ private final long shortTimeout; // Paper - Timeout time for just printing a dump but not restarting - private final boolean restart; - private volatile long lastTick; -+ private long lastShortDump; // Paper - Keep track of short dump times to avoid spamming console with short dumps - private volatile boolean stopping; - - private WatchdogThread(long timeoutTime, boolean restart) -@@ -22,6 +24,7 @@ public class WatchdogThread extends Thread - super( "Paper Watchdog Thread" ); - this.timeoutTime = timeoutTime; - this.restart = restart; -+ shortTimeout = Math.min(5000, timeoutTime); // Paper - Make short timeout the lower of 5 seconds, and timeout time - } - - public static void doStart(int timeoutTime, boolean restart) -@@ -52,9 +55,19 @@ public class WatchdogThread extends Thread - while ( !stopping ) - { - // -- if ( lastTick != 0 && System.currentTimeMillis() > lastTick + timeoutTime && !Boolean.getBoolean("disable.watchdog")) // Paper - Add property to disable -+ long currentTime = System.currentTimeMillis(); // Paper - do we REALLY need to call this method multiple times? -+ if ( lastTick != 0 && currentTime > lastTick + shortTimeout && !Boolean.getBoolean("disable.watchdog") ) // Paper - Add property to disable and short timeout - { -+ // Paper start -+ boolean isLongTimeout = currentTime > lastTick + timeoutTime; -+ // Don't spam short dumps -+ if ( !isLongTimeout && currentTime < lastShortDump + shortTimeout ) continue; -+ lastShortDump = currentTime; -+ // Paper end - Logger log = Bukkit.getServer().getLogger(); -+ // Paper start - Different message when it's a short timeout -+ if ( isLongTimeout ) -+ { - log.log( Level.SEVERE, "The server has stopped responding!" ); - log.log( Level.SEVERE, "Please report this to https://github.com/PaperMC/Paper/issues" ); - log.log( Level.SEVERE, "Be sure to include ALL relevant console errors and Minecraft crash reports" ); -@@ -79,11 +92,19 @@ public class WatchdogThread extends Thread - } - } - // Paper end -+ } else -+ { -+ log.log( Level.SEVERE, "The server has not responded for " + shortTimeout / 1000 + " seconds! Creating thread dump"); -+ } -+ // Paper end - Different message for short timeout - log.log( Level.SEVERE, "------------------------------" ); - log.log( Level.SEVERE, "Server thread dump (Look for plugins here before reporting to Paper!):" ); - dumpThread( ManagementFactory.getThreadMXBean().getThreadInfo( MinecraftServer.getServer().primaryThread.getId(), Integer.MAX_VALUE ), log ); - log.log( Level.SEVERE, "------------------------------" ); - // -+ // Paper start - Only print full dump on long timeouts -+ if ( isLongTimeout ) -+ { - log.log( Level.SEVERE, "Entire Thread Dump:" ); - ThreadInfo[] threads = ManagementFactory.getThreadMXBean().dumpAllThreads( true, true ); - for ( ThreadInfo thread : threads ) -@@ -97,11 +118,12 @@ public class WatchdogThread extends Thread - RestartCommand.restart(); - } - break; -+ } // Paper end - } - - try - { -- sleep( 10000 ); -+ sleep( 1000 ); // Paper - Reduce check time to every second instead of every ten seconds, more consistent and allows for short timeout - } catch ( InterruptedException ex ) - { - interrupt(); --- -2.18.0 - diff --git a/Spigot-Server-Patches/0349-Add-Early-Warning-Feature-to-WatchDog.patch b/Spigot-Server-Patches/0349-Add-Early-Warning-Feature-to-WatchDog.patch new file mode 100644 index 0000000000..ab9f66b462 --- /dev/null +++ b/Spigot-Server-Patches/0349-Add-Early-Warning-Feature-to-WatchDog.patch @@ -0,0 +1,166 @@ +From 572418116aca425b6283f25eb1aaefafac1381fd Mon Sep 17 00:00:00 2001 +From: miclebrick +Date: Wed, 8 Aug 2018 15:30:52 -0400 +Subject: [PATCH] Add Early Warning Feature to WatchDog + +Detect when the server has been hung for a long duration, and start printing +thread dumps at an interval until the point of crash. + +This will help diagnose what was going on in that time before the crash. + +diff --git a/src/main/java/com/destroystokyo/paper/PaperConfig.java b/src/main/java/com/destroystokyo/paper/PaperConfig.java +index a5ff014e33..332e90f86b 100644 +--- a/src/main/java/com/destroystokyo/paper/PaperConfig.java ++++ b/src/main/java/com/destroystokyo/paper/PaperConfig.java +@@ -23,6 +23,8 @@ import org.bukkit.configuration.InvalidConfigurationException; + import org.bukkit.configuration.file.YamlConfiguration; + import co.aikar.timings.Timings; + import co.aikar.timings.TimingsManager; ++import org.spigotmc.SpigotConfig; ++import org.spigotmc.WatchdogThread; + + public class PaperConfig { + +@@ -304,6 +306,14 @@ public class PaperConfig { + } + } + ++ public static int watchdogPrintEarlyWarningEvery = 5000; ++ public static int watchdogPrintEarlyWarningDelay = 10000; ++ private static void watchdogEarlyWarning() { ++ watchdogPrintEarlyWarningEvery = getInt("settings.watchdog.early-warning-every", 5000); ++ watchdogPrintEarlyWarningDelay = getInt("settings.watchdog.early-warning-delay", 10000); ++ WatchdogThread.doStart(SpigotConfig.timeoutTime, SpigotConfig.restartOnCrash ); ++ } ++ + public static int tabSpamIncrement = 10; + public static int tabSpamLimit = 500; + private static void tabSpamLimiters() { +diff --git a/src/main/java/net/minecraft/server/MinecraftServer.java b/src/main/java/net/minecraft/server/MinecraftServer.java +index 0399a48e19..e0546e3dd2 100644 +--- a/src/main/java/net/minecraft/server/MinecraftServer.java ++++ b/src/main/java/net/minecraft/server/MinecraftServer.java +@@ -622,6 +622,7 @@ public abstract class MinecraftServer implements ICommandListener, Runnable, IAs + this.a(this.q); + + // Spigot start ++ org.spigotmc.WatchdogThread.hasStarted = true; + Arrays.fill( recentTps, 20 ); + long start = System.nanoTime(), lastTick = start - TICK_TIME, catchupTime = 0, curTime, wait, tickSection = start; // Paper - Further improve server tick loop + while (this.isRunning) { +diff --git a/src/main/java/org/spigotmc/SpigotConfig.java b/src/main/java/org/spigotmc/SpigotConfig.java +index 4f9fd4bc60..2cdd9aaf81 100644 +--- a/src/main/java/org/spigotmc/SpigotConfig.java ++++ b/src/main/java/org/spigotmc/SpigotConfig.java +@@ -223,7 +223,7 @@ public class SpigotConfig + restartScript = getString( "settings.restart-script", restartScript ); + restartMessage = transform( getString( "messages.restart", "Server is restarting" ) ); + commands.put( "restart", new RestartCommand( "restart" ) ); +- WatchdogThread.doStart( timeoutTime, restartOnCrash ); ++ //WatchdogThread.doStart( timeoutTime, restartOnCrash ); // Paper - moved to PaperConfig + } + + public static boolean bungee; +diff --git a/src/main/java/org/spigotmc/WatchdogThread.java b/src/main/java/org/spigotmc/WatchdogThread.java +index 57a4748a30..28a9e2b96f 100644 +--- a/src/main/java/org/spigotmc/WatchdogThread.java ++++ b/src/main/java/org/spigotmc/WatchdogThread.java +@@ -5,6 +5,7 @@ import java.lang.management.MonitorInfo; + import java.lang.management.ThreadInfo; + import java.util.logging.Level; + import java.util.logging.Logger; ++import com.destroystokyo.paper.PaperConfig; + import net.minecraft.server.MinecraftServer; + import org.bukkit.Bukkit; + +@@ -13,6 +14,10 @@ public class WatchdogThread extends Thread + + private static WatchdogThread instance; + private final long timeoutTime; ++ private final long earlyWarningEvery; // Paper - Timeout time for just printing a dump but not restarting ++ private final long earlyWarningDelay; // Paper ++ public static volatile boolean hasStarted; // Paper ++ private long lastEarlyWarning; // Paper - Keep track of short dump times to avoid spamming console with short dumps + private final boolean restart; + private volatile long lastTick; + private volatile boolean stopping; +@@ -22,6 +27,8 @@ public class WatchdogThread extends Thread + super( "Paper Watchdog Thread" ); + this.timeoutTime = timeoutTime; + this.restart = restart; ++ earlyWarningEvery = Math.min(PaperConfig.watchdogPrintEarlyWarningEvery, timeoutTime); // Paper ++ earlyWarningDelay = Math.min(PaperConfig.watchdogPrintEarlyWarningDelay, timeoutTime); // Paper + } + + public static void doStart(int timeoutTime, boolean restart) +@@ -52,9 +59,19 @@ public class WatchdogThread extends Thread + while ( !stopping ) + { + // +- if ( lastTick != 0 && System.currentTimeMillis() > lastTick + timeoutTime && !Boolean.getBoolean("disable.watchdog")) // Paper - Add property to disable ++ long currentTime = System.currentTimeMillis(); // Paper - do we REALLY need to call this method multiple times? ++ if ( lastTick != 0 && currentTime > lastTick + earlyWarningEvery && !Boolean.getBoolean("disable.watchdog") ) // Paper - Add property to disable and short timeout + { ++ // Paper start ++ boolean isLongTimeout = currentTime > lastTick + timeoutTime; ++ // Don't spam early warning dumps ++ if ( !isLongTimeout && (earlyWarningEvery <= 0 || !hasStarted || currentTime < lastEarlyWarning + earlyWarningEvery || currentTime < lastTick + earlyWarningDelay)) continue; ++ lastEarlyWarning = currentTime; ++ // Paper end + Logger log = Bukkit.getServer().getLogger(); ++ // Paper start - Different message when it's a short timeout ++ if ( isLongTimeout ) ++ { + log.log( Level.SEVERE, "The server has stopped responding!" ); + log.log( Level.SEVERE, "Please report this to https://github.com/PaperMC/Paper/issues" ); + log.log( Level.SEVERE, "Be sure to include ALL relevant console errors and Minecraft crash reports" ); +@@ -79,29 +96,45 @@ public class WatchdogThread extends Thread + } + } + // Paper end ++ } else ++ { ++ log.log(Level.SEVERE, "--- DO NOT REPORT THIS TO PAPER - THIS IS NOT A BUG OR A CRASH ---"); ++ log.log(Level.SEVERE, "The server has not responded for " + (currentTime - lastTick) / 1000 + " seconds! Creating thread dump"); ++ } ++ // Paper end - Different message for short timeout + log.log( Level.SEVERE, "------------------------------" ); + log.log( Level.SEVERE, "Server thread dump (Look for plugins here before reporting to Paper!):" ); + dumpThread( ManagementFactory.getThreadMXBean().getThreadInfo( MinecraftServer.getServer().primaryThread.getId(), Integer.MAX_VALUE ), log ); + log.log( Level.SEVERE, "------------------------------" ); + // ++ // Paper start - Only print full dump on long timeouts ++ if ( isLongTimeout ) ++ { + log.log( Level.SEVERE, "Entire Thread Dump:" ); + ThreadInfo[] threads = ManagementFactory.getThreadMXBean().dumpAllThreads( true, true ); + for ( ThreadInfo thread : threads ) + { + dumpThread( thread, log ); + } ++ } else { ++ log.log(Level.SEVERE, "--- DO NOT REPORT THIS TO PAPER - THIS IS NOT A BUG OR A CRASH ---"); ++ } ++ ++ + log.log( Level.SEVERE, "------------------------------" ); + + if ( restart ) + { + RestartCommand.restart(); + } ++ if (isLongTimeout) { + break; ++ } // Paper end + } + + try + { +- sleep( 10000 ); ++ sleep( 1000 ); // Paper - Reduce check time to every second instead of every ten seconds, more consistent and allows for short timeout + } catch ( InterruptedException ex ) + { + interrupt(); +-- +2.18.0 +