LangChain 框架大项目用起来有多痛苦本文整理了一套工程化方案前言老王为什么本文们的 LangChain 服务一重启就丢记忆 全栈工程师小李一脸无奈。本文看了看他的代码发现他用的是默认的 BufferMemory。你这是把记忆存在内存里啊服务一重启当然就没了那应该怎么弄看来得从 LangChain 的工程化讲起了。今天本文们聊聊 LangChain 长期记忆的工程化方案。一、底层原理1.1 LangChain 的长期记忆困境LangChain 的核心是把 LLM 调用链式编排但长期记忆缺乏工程化支持graph TD A[用户输入] -- B[LangChain Chain] B -- C[LLM 调用] C -- D{有记忆吗} D --|有| E[ConversationBufferMemory] D --|无| F[每次都重新来] E -- G[内存中累积] G -- H[Token 超限] H -- I[崩溃] F -- J[用户体验差]核心问题ConversationBufferMemory 无限增长没有自动压缩机制状态丢失无法恢复prompt 版本难管理1.2 LangChain 长期记忆方案对比方案容量持久化复杂度BufferMemory小否低SummaryMemory中否中向量存储记忆大是高自定义记忆可控是最高二、快速上手看默认的 BufferMemory 问题from langchain.memory import ConversationBufferMemory memory ConversationBufferMemory() memory.chat_memory.add_user_message(你好) memory.chat_memory.add_ai_message(你好) # 对话越长memory 越大...改进版加摘要from langchain.memory import ConversationSummaryMemory from langchain.llms import OpenAI llm OpenAI() memory ConversationSummaryMemory(llmllm) memory.save_context({input: 你好}, {output: 你好}) memory.save_context({input: 今天天气怎么样}, {output: 今天是晴天。}) # 自动总结 print(memory.load_memory_variables({}))三、核心 API / 深水区3.1 LangChain 记忆方案速查记忆类型特点适合场景BufferMemory保存所有短对话SummaryMemory压缩总结中长对话VectorStoreMemory向量检索知识问答CombinedMemory混合使用复杂场景3.2 自定义持久化记忆import json import os from langchain.memory import BaseMemory class FilePersistedMemory(BaseMemory): def __init__(self, file_pathmemory.json): self.file_path file_path self.memory self._load() def _load(self): if os.path.exists(self.file_path): with open(self.file_path) as f: return json.load(f) return {history: []} def save(self): with open(self.file_path, w) as f: json.dump(self.memory, f, ensure_asciiFalse) property def memory_variables(self): return [history] def load_memory_variables(self, inputs): return self.memory def save_context(self, inputs, outputs): self.memory[history].append({ input: inputs.get(input, ), output: outputs.get(output, ) }) self.save() if len(self.memory[history]) 100: self._compress() def _compress(self): self.memory[history] self.memory[history][-50:]3.3 多轮记忆管理from typing import Dict, List, Any from dataclasses import dataclass dataclass class TurnContext: turn: int user_input: str assistant_output: str retrieved_docs: List[str] class MultiTurnMemory: def __init__(self, llm, max_turns20): self.llm llm self.max_turns max_turns self.turns: List[TurnContext] [] def add_turn(self, user_input: str, output: str, docsNone): self.turns.append(TurnContext( turnlen(self.turns) 1, user_inputuser_input, assistant_outputoutput, retrieved_docsdocs or [] )) if len(self.turns) self.max_turns: self._summarize_old() def _summarize_old(self): old self.turns[:5] text \n.join(fU: {t.user_input}\nA: {t.assistant_output} for t in old) prompt f总结前五轮对话\n{text} summary self.llm(prompt) self.turns self.turns[5:] print(f历史摘要{summary}) def get_context(self) - str: recent self.turns[-5:] return \n.join( f用户{t.user_input}\n助手{t.assistant_output} for t in recent )四、实战演练完整工程化 LangChain 记忆系统from typing import Dict, List, Optional import json import time class EngineeringMemory: def __init__(self, llm, persist_pathmemory.json): self.llm llm self.persist_path persist_path self.short_term: List[Dict] [] self.long_term: List[Dict] [] self.summaries: List[str] [] self._load() def add_interaction(self, user_input: str, output: str): interaction { user: user_input, assistant: output, timestamp: time.time() } self.short_term.append(interaction) self._balance() self._save() def _balance(self): if len(self.short_term) 20: old_interactions self.short_term[:-10] if old_interactions: text \n.join( fU: {i[user]}\nA: {i[assistant]} for i in old_interactions ) prompt f总结对话{text[:1500]} summary self.llm(prompt) self.summaries.append(summary) self.short_term self.short_term[-10:] def get_memory_context(self) - str: parts [] if self.summaries: parts.append(【历史摘要】) parts.extend(self.summaries[-3:]) if self.short_term: parts.append(【最近对话】) for i in self.short_term[-5:]: parts.append(f用户{i[user]}) parts.append(f助手{i[assistant]}) return \n.join(parts) def _save(self): data { short_term: self.short_term, summaries: self.summaries } with open(self.persist_path, w) as f: json.dump(data, f, ensure_asciiFalse) def _load(self): try: with open(self.persist_path) as f: data json.load(f) self.short_term data.get(short_term, []) self.summaries data.get(summaries, []) except: pass memory EngineeringMemory(llm) # 模拟 50 轮对话 for i in range(50): memory.add_interaction(f第{i}次提问, f第{i}次回复) print(memory.get_memory_context())五、避坑指南与最佳实践 **技巧做好持久化进程重启后记忆丢失一定要持久化。⚠️ **警告BufferMemory 不要用太多对话一长Token 就爆了。✅ **推荐分层记忆短期 短期摘要 长期三级架构。六、综合实战演示生产级 LangChain 记忆系统from typing import Dict, List, Optional from datetime import datetime import json class LangChainPersistentMemory: def __init__(self, llm, user_id: str): self.llm llm self.user_id user_id self.short_memory: List[Dict] [] self.long_memory: Dict[str, str] {} self.last_summary_time 0 def add(self, role: str, content: str): msg { role: role, content: content, time: datetime.now().isoformat() } self.short_memory.append(msg) if len(self.short_memory) 10: self._compress() def _compress(self): to_compress self.short_memory[:-5] if not to_compress: return text \n.join(m[content] for m in to_compress) summary self.llm(f总结{text}) key fsummary_{len(self.long_memory)} self.long_memory[key] summary self.short_memory self.short_memory[-5:] def get_context(self, token_limit3000) - str: parts [] # 长期记忆 for k, v in self.long_memory.items(): part f[历史] {v} if sum(len(p) for p in parts) len(part) token_limit: break parts.append(part) # 短期记忆 for m in self.short_memory: part f{m[role]}: {m[content]} parts.append(part) return \n.join(parts) def to_dict(self) - Dict: return { user_id: self.user_id, short: self.short_memory, long: self.long_memory } memory LangChainPersistentMemory(llm, user_001) for i in range(15): memory.add(user, f问题{i}) memory.add(assistant, f回答{i}) print(memory.get_context())七、总结LangChain 的长期记忆工程化要点不要只用 BufferMemory它会无限增长最终导致 Token 超限自动压缩 摘要定期总结历史对话控制上下文大小持久化存储进程重启后记忆不丢失分层记忆架构短期记忆 短期摘要 长期记忆搞好了这些LangChain 项目才能在真实场景稳定运行。