[{"data":1,"prerenderedAt":48},["ShallowReactive",2],{"i-lucide:menu":3,"i-lucide:grip":8,"i-lucide:chevron-right":10,"i-lucide:moon":12,"i-lucide:sun":14,"i-material-symbols:language":16,"i-lucide:chevron-down":18,"i-lucide:shield-check":20,"i-lucide:mail":22,"i-tabler:markdown":24,"i-lucide:file-text":26,"i-lucide:box":28,"i-lucide:code-2":30,"i-lucide:image":32,"i-lucide:square-sigma":34,"i-lucide:gamepad-2":36,"i-lucide:sparkles":38,"blog-body-how-text-diff-algorithms-work-zh":40,"i-lucide:cpu":41,"i-lucide:code":43,"i-lucide:diff":45,"i-lucide:code-xml":47,"i-lucide:film":47,"i-lucide:graduation-cap":47},{"left":4,"top":4,"width":5,"height":5,"rotate":4,"vFlip":6,"hFlip":6,"body":7},0,24,false,"\u003Cpath fill=\"none\" stroke=\"currentColor\" stroke-linecap=\"round\" stroke-linejoin=\"round\" stroke-width=\"2\" d=\"M4 5h16M4 12h16M4 19h16\"\u002F>",{"left":4,"top":4,"width":5,"height":5,"rotate":4,"vFlip":6,"hFlip":6,"body":9},"\u003Cg fill=\"none\" stroke=\"currentColor\" stroke-linecap=\"round\" stroke-linejoin=\"round\" stroke-width=\"2\">\u003Ccircle cx=\"12\" cy=\"5\" r=\"1\"\u002F>\u003Ccircle cx=\"19\" cy=\"5\" r=\"1\"\u002F>\u003Ccircle cx=\"5\" cy=\"5\" r=\"1\"\u002F>\u003Ccircle cx=\"12\" cy=\"12\" r=\"1\"\u002F>\u003Ccircle cx=\"19\" cy=\"12\" r=\"1\"\u002F>\u003Ccircle cx=\"5\" cy=\"12\" r=\"1\"\u002F>\u003Ccircle cx=\"12\" cy=\"19\" r=\"1\"\u002F>\u003Ccircle cx=\"19\" cy=\"19\" r=\"1\"\u002F>\u003Ccircle cx=\"5\" cy=\"19\" r=\"1\"\u002F>\u003C\u002Fg>",{"left":4,"top":4,"width":5,"height":5,"rotate":4,"vFlip":6,"hFlip":6,"body":11},"\u003Cpath fill=\"none\" stroke=\"currentColor\" stroke-linecap=\"round\" stroke-linejoin=\"round\" stroke-width=\"2\" d=\"m9 18l6-6l-6-6\"\u002F>",{"left":4,"top":4,"width":5,"height":5,"rotate":4,"vFlip":6,"hFlip":6,"body":13},"\u003Cpath fill=\"none\" stroke=\"currentColor\" stroke-linecap=\"round\" stroke-linejoin=\"round\" stroke-width=\"2\" d=\"M20.985 12.486a9 9 0 1 1-9.473-9.472c.405-.022.617.46.402.803a6 6 0 0 0 8.268 8.268c.344-.215.825-.004.803.401\"\u002F>",{"left":4,"top":4,"width":5,"height":5,"rotate":4,"vFlip":6,"hFlip":6,"body":15},"\u003Cg fill=\"none\" stroke=\"currentColor\" stroke-linecap=\"round\" stroke-linejoin=\"round\" stroke-width=\"2\">\u003Ccircle cx=\"12\" cy=\"12\" r=\"4\"\u002F>\u003Cpath d=\"M12 2v2m0 16v2M4.93 4.93l1.41 1.41m11.32 11.32l1.41 1.41M2 12h2m16 0h2M6.34 17.66l-1.41 1.41M19.07 4.93l-1.41 1.41\"\u002F>\u003C\u002Fg>",{"left":4,"top":4,"width":5,"height":5,"rotate":4,"vFlip":6,"hFlip":6,"body":17},"\u003Cpath fill=\"currentColor\" d=\"M8.125 21.213q-1.825-.788-3.187-2.15t-2.15-3.188T2 11.988t.788-3.875t2.15-3.175t3.187-2.15T12.013 2t3.875.788t3.175 2.15t2.15 3.175t.787 3.875t-.787 3.887t-2.15 3.188t-3.175 2.15t-3.875.787t-3.888-.787M12 19.95q.65-.9 1.125-1.875T13.9 16h-3.8q.3 1.1.775 2.075T12 19.95m-2.6-.4q-.45-.825-.787-1.713T8.05 16H5.1q.725 1.25 1.813 2.175T9.4 19.55m5.2 0q1.4-.45 2.488-1.375T18.9 16h-2.95q-.225.95-.562 1.838T14.6 19.55M4.25 14h3.4q-.075-.5-.112-.987T7.5 12t.038-1.012T7.65 10h-3.4q-.125.5-.187.988T4 12t.063 1.013t.187.987m5.4 0h4.7q.075-.5.113-.987T14.5 12t-.038-1.012T14.35 10h-4.7q-.075.5-.112.988T9.5 12t.038 1.013t.112.987m6.7 0h3.4q.125-.5.188-.987T20 12t-.062-1.012T19.75 10h-3.4q.075.5.113.988T16.5 12t-.038 1.013t-.112.987m-.4-6h2.95q-.725-1.25-1.812-2.175T14.6 4.45q.45.825.788 1.713T15.95 8M10.1 8h3.8q-.3-1.1-.775-2.075T12 4.05q-.65.9-1.125 1.875T10.1 8m-5 0h2.95q.225-.95.563-1.838T9.4 4.45Q8 4.9 6.912 5.825T5.1 8\"\u002F>",{"left":4,"top":4,"width":5,"height":5,"rotate":4,"vFlip":6,"hFlip":6,"body":19},"\u003Cpath fill=\"none\" stroke=\"currentColor\" stroke-linecap=\"round\" stroke-linejoin=\"round\" stroke-width=\"2\" d=\"m6 9l6 6l6-6\"\u002F>",{"left":4,"top":4,"width":5,"height":5,"rotate":4,"vFlip":6,"hFlip":6,"body":21},"\u003Cg fill=\"none\" stroke=\"currentColor\" stroke-linecap=\"round\" stroke-linejoin=\"round\" stroke-width=\"2\">\u003Cpath d=\"M20 13c0 5-3.5 7.5-7.66 8.95a1 1 0 0 1-.67-.01C7.5 20.5 4 18 4 13V6a1 1 0 0 1 1-1c2 0 4.5-1.2 6.24-2.72a1.17 1.17 0 0 1 1.52 0C14.51 3.81 17 5 19 5a1 1 0 0 1 1 1z\"\u002F>\u003Cpath d=\"m9 12l2 2l4-4\"\u002F>\u003C\u002Fg>",{"left":4,"top":4,"width":5,"height":5,"rotate":4,"vFlip":6,"hFlip":6,"body":23},"\u003Cg fill=\"none\" stroke=\"currentColor\" stroke-linecap=\"round\" stroke-linejoin=\"round\" stroke-width=\"2\">\u003Cpath d=\"m22 7l-8.991 5.727a2 2 0 0 1-2.009 0L2 7\"\u002F>\u003Crect width=\"20\" height=\"16\" x=\"2\" y=\"4\" rx=\"2\"\u002F>\u003C\u002Fg>",{"left":4,"top":4,"width":5,"height":5,"rotate":4,"vFlip":6,"hFlip":6,"body":25},"\u003Cg fill=\"none\" stroke=\"currentColor\" stroke-linecap=\"round\" stroke-linejoin=\"round\" stroke-width=\"2\">\u003Cpath d=\"M3 7a2 2 0 0 1 2-2h14a2 2 0 0 1 2 2v10a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2z\"\u002F>\u003Cpath d=\"M7 15V9l2 2l2-2v6m3-2l2 2l2-2m-2 2V9\"\u002F>\u003C\u002Fg>",{"left":4,"top":4,"width":5,"height":5,"rotate":4,"vFlip":6,"hFlip":6,"body":27},"\u003Cg fill=\"none\" stroke=\"currentColor\" stroke-linecap=\"round\" stroke-linejoin=\"round\" stroke-width=\"2\">\u003Cpath d=\"M6 22a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h8a2.4 2.4 0 0 1 1.704.706l3.588 3.588A2.4 2.4 0 0 1 20 8v12a2 2 0 0 1-2 2z\"\u002F>\u003Cpath d=\"M14 2v5a1 1 0 0 0 1 1h5M10 9H8m8 4H8m8 4H8\"\u002F>\u003C\u002Fg>",{"left":4,"top":4,"width":5,"height":5,"rotate":4,"vFlip":6,"hFlip":6,"body":29},"\u003Cg fill=\"none\" stroke=\"currentColor\" stroke-linecap=\"round\" stroke-linejoin=\"round\" stroke-width=\"2\">\u003Cpath d=\"M21 8a2 2 0 0 0-1-1.73l-7-4a2 2 0 0 0-2 0l-7 4A2 2 0 0 0 3 8v8a2 2 0 0 0 1 1.73l7 4a2 2 0 0 0 2 0l7-4A2 2 0 0 0 21 16Z\"\u002F>\u003Cpath d=\"m3.3 7l8.7 5l8.7-5M12 22V12\"\u002F>\u003C\u002Fg>",{"left":4,"top":4,"width":5,"height":5,"rotate":4,"vFlip":6,"hFlip":6,"body":31},"\u003Cpath fill=\"none\" stroke=\"currentColor\" stroke-linecap=\"round\" stroke-linejoin=\"round\" stroke-width=\"2\" d=\"m18 16l4-4l-4-4M6 8l-4 4l4 4m8.5-12l-5 16\"\u002F>",{"left":4,"top":4,"width":5,"height":5,"rotate":4,"vFlip":6,"hFlip":6,"body":33},"\u003Cg fill=\"none\" stroke=\"currentColor\" stroke-linecap=\"round\" stroke-linejoin=\"round\" stroke-width=\"2\">\u003Crect width=\"18\" height=\"18\" x=\"3\" y=\"3\" rx=\"2\" ry=\"2\"\u002F>\u003Ccircle cx=\"9\" cy=\"9\" r=\"2\"\u002F>\u003Cpath d=\"m21 15l-3.086-3.086a2 2 0 0 0-2.828 0L6 21\"\u002F>\u003C\u002Fg>",{"left":4,"top":4,"width":5,"height":5,"rotate":4,"vFlip":6,"hFlip":6,"body":35},"\u003Cg fill=\"none\" stroke=\"currentColor\" stroke-linecap=\"round\" stroke-linejoin=\"round\" stroke-width=\"2\">\u003Crect width=\"18\" height=\"18\" x=\"3\" y=\"3\" rx=\"2\"\u002F>\u003Cpath d=\"M16 8.9V7H8l4 5l-4 5h8v-1.9\"\u002F>\u003C\u002Fg>",{"left":4,"top":4,"width":5,"height":5,"rotate":4,"vFlip":6,"hFlip":6,"body":37},"\u003Cpath fill=\"none\" stroke=\"currentColor\" stroke-linecap=\"round\" stroke-linejoin=\"round\" stroke-width=\"2\" d=\"M6 11h4M8 9v4m7-1h.01M18 10h.01m-.69-5H6.68a4 4 0 0 0-3.978 3.59l-.017.152C2.604 9.416 2 14.456 2 16a3 3 0 0 0 3 3c1 0 1.5-.5 2-1l1.414-1.414A2 2 0 0 1 9.828 16h4.344a2 2 0 0 1 1.414.586L17 18c.5.5 1 1 2 1a3 3 0 0 0 3-3c0-1.545-.604-6.584-.685-7.258q-.01-.075-.017-.151A4 4 0 0 0 17.32 5\"\u002F>",{"left":4,"top":4,"width":5,"height":5,"rotate":4,"vFlip":6,"hFlip":6,"body":39},"\u003Cg fill=\"none\" stroke=\"currentColor\" stroke-linecap=\"round\" stroke-linejoin=\"round\" stroke-width=\"2\">\u003Cpath d=\"M11.017 2.814a1 1 0 0 1 1.966 0l1.051 5.558a2 2 0 0 0 1.594 1.594l5.558 1.051a1 1 0 0 1 0 1.966l-5.558 1.051a2 2 0 0 0-1.594 1.594l-1.051 5.558a1 1 0 0 1-1.966 0l-1.051-5.558a2 2 0 0 0-1.594-1.594l-5.558-1.051a1 1 0 0 1 0-1.966l5.558-1.051a2 2 0 0 0 1.594-1.594zM20 2v4m2-2h-4\"\u002F>\u003Ccircle cx=\"4\" cy=\"20\" r=\"2\"\u002F>\u003C\u002Fg>","\u003Cblockquote>\n\u003Cp>文本对比看起来只是&quot;把不一样的地方标红标绿&quot;，但真正难的是：\u003Cstrong>插入一行后，后面所有行都往下挪了，怎么知道它们没变、只是位置偏了？\u003C\u002Fstrong> diff 算法的核心不是逐字符比，而是求一条&quot;从 A 改到 B 代价最小的编辑路径&quot;。\u003C\u002Fp>\n\u003C\u002Fblockquote>\n\u003Cp>\u003Cimg src=\"\u002Fblog\u002Fhow-text-diff-algorithms-work\u002Fcover.webp\" alt=\"在编辑图网格上寻找两段文本之间的最短编辑路径\">\u003C\u002Fp>\n\u003Ch2>diff 到底在求解什么问题？\u003C\u002Fh2>\n\u003Cp>diff 的本质是求\u003Cstrong>从文本 A 变成文本 B 的最短编辑序列\u003C\u002Fstrong>：用最少的&quot;插入&quot;和&quot;删除&quot;操作把 A 改成 B。为什么不逐字符比？因为逐位比对遇到&quot;在第 3 行插入一行&quot;会彻底错乱——从插入点往后每一行都和原来对不齐，朴素比对会把它们全判成&quot;改了&quot;，而人眼一看就知道只是整体下移、内容没变。\u003C\u002Fp>\n\u003Cp>把它建模成&quot;最少编辑操作&quot;后，算法就能识别出&quot;这些行其实没变，只是被插入的新行顶下去了&quot;。这与\u003Cstrong>最长公共子序列（LCS）\u003C\u002Fstrong> 是同一枚硬币的两面：A 和 B 的 LCS 越长，需要增删的部分就越少，两者一一对应。\u003C\u002Fp>\n\u003Ch2>最长公共子序列（LCS）是怎么定位&quot;没变的部分&quot;的？\u003C\u002Fh2>\n\u003Cp>LCS 指\u003Cstrong>两段文本中按原顺序都出现、但不要求连续的最长子序列\u003C\u002Fstrong>。它代表 A 和 B 共有、未被改动的内容；剩下的部分，A 里独有的就是&quot;删除&quot;，B 里独有的就是&quot;新增&quot;。经典解法是动态规划：\u003C\u002Fp>\n\u003Cul>\n\u003Cli>用一个二维表 \u003Ccode>dp[i][j]\u003C\u002Fcode> 表示 A 前 i 个单位与 B 前 j 个单位的 LCS 长度；\u003C\u002Fli>\n\u003Cli>当前两个单位相同则 \u003Ccode>dp[i][j] = dp[i-1][j-1] + 1\u003C\u002Fcode>，否则取上方与左方的较大值；\u003C\u002Fli>\n\u003Cli>回溯这张表即可还原出哪些是公共的、哪些是增删。\u003C\u002Fli>\n\u003C\u002Ful>\n\u003Cp>它直观、好实现，但时间和空间都是 O(N×M)，N、M 是两段文本长度。文本一大，二维表就吃内存，这是它的硬伤。\u003C\u002Fp>\n\u003Ch2>Myers 算法为什么更快、更常用？\u003C\u002Fh2>\n\u003Cp>Myers 算法把 diff 看成&quot;在编辑图里找最短路径&quot;，在差异较小时远快于朴素 DP，这也是 Git 默认用它的原因。它的关键洞察是：实际场景里两个版本往往\u003Cstrong>只差几处\u003C\u002Fstrong>，差异数 D 很小。Myers 的复杂度约 \u003Cstrong>O(ND)\u003C\u002Fstrong>（N 为总长度，D 为编辑距离），当 D 远小于 N 时几乎线性。\u003C\u002Fp>\n\u003Ctable>\n\u003Cthead>\n\u003Ctr>\n\u003Cth>维度\u003C\u002Fth>\n\u003Cth>LCS 动态规划\u003C\u002Fth>\n\u003Cth>Myers (O(ND))\u003C\u002Fth>\n\u003C\u002Ftr>\n\u003C\u002Fthead>\n\u003Ctbody>\n\u003Ctr>\n\u003Ctd>思路\u003C\u002Ftd>\n\u003Ctd>填二维表求最长公共子序列\u003C\u002Ftd>\n\u003Ctd>编辑图上找最短编辑路径\u003C\u002Ftd>\n\u003C\u002Ftr>\n\u003Ctr>\n\u003Ctd>时间复杂度\u003C\u002Ftd>\n\u003Ctd>O(N×M)\u003C\u002Ftd>\n\u003Ctd>约 O(ND)，D 为差异数\u003C\u002Ftd>\n\u003C\u002Ftr>\n\u003Ctr>\n\u003Ctd>空间\u003C\u002Ftd>\n\u003Ctd>O(N×M)（可优化到 O(N)）\u003C\u002Ftd>\n\u003Ctd>线性空间变体 O(N)\u003C\u002Ftd>\n\u003C\u002Ftr>\n\u003Ctr>\n\u003Ctd>差异很小时\u003C\u002Ftd>\n\u003Ctd>仍要填满整张表\u003C\u002Ftd>\n\u003Ctd>极快（接近线性）\u003C\u002Ftd>\n\u003C\u002Ftr>\n\u003Ctr>\n\u003Ctd>差异很大时\u003C\u002Ftd>\n\u003Ctd>稳定但慢\u003C\u002Ftd>\n\u003Ctd>退化到接近 O(N²)\u003C\u002Ftd>\n\u003C\u002Ftr>\n\u003Ctr>\n\u003Ctd>典型采用\u003C\u002Ftd>\n\u003Ctd>教学、短文本\u003C\u002Ftd>\n\u003Ctd>Git、多数 diff 工具\u003C\u002Ftd>\n\u003C\u002Ftr>\n\u003C\u002Ftbody>\n\u003C\u002Ftable>\n\u003Cp>结论很实际：\u003Cstrong>差异小用 Myers 占尽便宜，差异极大时两者都难免变慢\u003C\u002Fstrong>。\u003C\u002Fp>\n\u003Ch2>行级 diff 和字符级 diff 该怎么选？\u003C\u002Fh2>\n\u003Cp>diff 的&quot;比对单位&quot;可大可小，单位选错会让结果难读。常见三档：\u003C\u002Fp>\n\u003Cul>\n\u003Cli>\u003Cstrong>行级\u003C\u002Fstrong>：以整行为单位比对，最常用。代码、配置、日志天然按行组织，结果干净；但只能告诉你&quot;这一行变了&quot;，不告诉你行里改了哪个字符。\u003C\u002Fli>\n\u003Cli>\u003Cstrong>词级\u003C\u002Fstrong>：以单词\u002F分词为单位，适合自然语言文本，能标出改了哪个词。\u003C\u002Fli>\n\u003Cli>\u003Cstrong>字符级\u003C\u002Fstrong>：以单个字符为单位，最细，但对长文本会产生大量琐碎的增删片段，反而看不清。\u003C\u002Fli>\n\u003C\u002Ful>\n\u003Cp>工程上的常见做法是\u003Cstrong>分层\u003C\u002Fstrong>：先按行 diff 抓住宏观结构，再对&quot;看起来是修改&quot;的行对做更细粒度的二次 diff。\u003C\u002Fp>\n\u003Ch2>&quot;行内高亮&quot;是怎么实现的？\u003C\u002Fh2>\n\u003Cp>行内高亮（标出某一行里具体改了哪几个字符）通常是\u003Cstrong>两级 diff 的结果\u003C\u002Fstrong>。第一级做行级 diff，会得到一串&quot;删除某行 + 新增某行&quot;的操作；当一个删除行和一个新增行在位置上配对、且内容相似度足够高时，就把它们判定为&quot;这一行被修改了&quot;。第二级再对这一对行单独跑一次字符级或词级 diff，把行内真正变化的片段精确标出。\u003C\u002Fp>\n\u003Cp>所以行内高亮并不是另一套魔法，而是\u003Cstrong>在更细的粒度上重复同样的算法\u003C\u002Fstrong>——这也解释了为什么它比纯行级 diff 更耗算力：每一对修改行都要再算一次。\u003C\u002Fp>\n\u003Ch2>能力边界与已知限制\u003C\u002Fh2>\n\u003Cp>diff 算法不是万能的，几个常见边界值得知道：\u003C\u002Fp>\n\u003Cul>\n\u003Cli>\u003Cstrong>差异极大时退化\u003C\u002Fstrong>：当两段文本几乎完全不同（D 接近 N），Myers 会退化到接近 O(N²)，超大文本可能明显卡顿；\u003C\u002Fli>\n\u003Cli>\u003Cstrong>&quot;语义等价&quot;识别不了\u003C\u002Fstrong>：算法只看字符序列，识别不了&quot;变量重命名后逻辑没变&quot;或&quot;代码块整体搬家&quot;这类语义层面的等价，搬家会被记成一删一增；\u003C\u002Fli>\n\u003Cli>\u003Cstrong>行尾与空白干扰\u003C\u002Fstrong>：CRLF\u002FLF 换行差异、行尾空格、缩进 tab\u002F空格混用，都会被算成差异，对比前常需归一化；\u003C\u002Fli>\n\u003Cli>\u003Cstrong>比对单位影响可读性\u003C\u002Fstrong>：字符级 diff 在长段落上会碎成大量小片段，可读性反而不如词级或行级。\u003C\u002Fli>\n\u003C\u002Ful>\n\u003Cp>判断一个 diff 结果好不好用，关键看\u003Cstrong>比对单位是否匹配文本类型、以及是否对换行\u002F空白做了合理归一化\u003C\u002Fstrong>，而不只是算法本身快不快。\u003C\u002Fp>\n\u003Ch2>小结\u003C\u002Fh2>\n\u003Cp>文本对比的核心不是&quot;逐字比对&quot;，而是\u003Cstrong>求最短编辑路径\u003C\u002Fstrong>：LCS 找出未变的公共部分，Myers 用编辑图最短路径在差异小时做到接近线性。行级、词级、字符级是不同的比对粒度，行内高亮则是&quot;先行级、再对修改行做字符级&quot;的两级 diff。理解了&quot;最短编辑序列 + 比对粒度 + 归一化&quot;这三件事，就能判断一类 diff 实现适合什么样的文本与规模。\u003C\u002Fp>\n",{"left":4,"top":4,"width":5,"height":5,"rotate":4,"vFlip":6,"hFlip":6,"body":42},"\u003Cg fill=\"none\" stroke=\"currentColor\" stroke-linecap=\"round\" stroke-linejoin=\"round\" stroke-width=\"2\">\u003Cpath d=\"M12 20v2m0-20v2m5 16v2m0-20v2M2 12h2m-2 5h2M2 7h2m16 5h2m-2 5h2M20 7h2M7 20v2M7 2v2\"\u002F>\u003Crect width=\"16\" height=\"16\" x=\"4\" y=\"4\" rx=\"2\"\u002F>\u003Crect width=\"8\" height=\"8\" x=\"8\" y=\"8\" rx=\"1\"\u002F>\u003C\u002Fg>",{"left":4,"top":4,"width":5,"height":5,"rotate":4,"vFlip":6,"hFlip":6,"body":44},"\u003Cpath fill=\"none\" stroke=\"currentColor\" stroke-linecap=\"round\" stroke-linejoin=\"round\" stroke-width=\"2\" d=\"m16 18l6-6l-6-6M8 6l-6 6l6 6\"\u002F>",{"left":4,"top":4,"width":5,"height":5,"rotate":4,"vFlip":6,"hFlip":6,"body":46},"\u003Cpath fill=\"none\" stroke=\"currentColor\" stroke-linecap=\"round\" stroke-linejoin=\"round\" stroke-width=\"2\" d=\"M12 3v14m-7-7h14M5 21h14\"\u002F>",null,1782832598606]