Build a Legal Document Review Agent in Python
Legal teams spend thousands of hours reviewing contracts for the same recurring risk patterns: unfavorable indemnification clauses, missing limitation-of-liability caps, auto-renewal traps, and non-standard IP ownership terms. These reviews are repetitive, time-consuming, and require a high level of attention to detail — making them a strong candidate for AI augmentation.
This tutorial builds a legal document review agent that ingests a contract in PDF or text format, extracts key clauses by category, flags risky provisions with severity scores, generates a structured risk report, and suggests alternative language for problematic sections. The agent is designed as a first-pass screening tool — it reduces the load on human reviewers by surfacing issues before they even open the document.
Important disclaimer: This agent is a legal technology tool, not a substitute for qualified legal advice. Always have a licensed attorney review contracts before signing.
What You'll Learn#
- How to extract and chunk legal text from PDF documents
- How to build clause-specific analysis prompts for accurate legal reasoning
- How to produce structured risk reports with severity scores and remediation suggestions
- How to build a multi-stage review pipeline with escalation logic
- How to handle large contracts that exceed LLM context windows
Prerequisites#
- Python 3.10+
- OpenAI API key (GPT-4o recommended for legal reasoning quality)
- Understanding of LangChain tools and agents
- Familiarity with AI agent concepts and legal AI use cases
Architecture Overview#
The review pipeline has five stages:
- Ingestion — Load and parse the contract PDF into clean text
- Segmentation — Split the text into logical sections (clauses) using headings and structure
- Clause Classification — Identify clause types: indemnification, liability, termination, IP, payment, etc.
- Risk Analysis — Analyze each clause type for risk level and deviation from market standard
- Report Generation — Produce a structured JSON + human-readable report with prioritized findings
Step 1: Install Dependencies#
pip install langchain==0.3.0 langchain-openai==0.2.0 pypdf==5.0.0 \
python-dotenv==1.0.1 pydantic==2.9.0 tiktoken==0.7.0
Step 2: Document Ingestion and Chunking#
Legal contracts often exceed 20,000 tokens. This ingestion layer extracts text, preserves section structure, and creates clause-sized chunks.
# ingestion.py
import re
from pathlib import Path
from pypdf import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
def extract_text_from_pdf(path: str) -> str:
"""Extract raw text from a PDF, preserving page breaks as section hints."""
reader = PdfReader(path)
pages = []
for page in reader.pages:
text = page.extract_text()
if text:
pages.append(text.strip())
return "\n\n[PAGE BREAK]\n\n".join(pages)
def extract_text_from_file(path: str) -> str:
"""Load contract text from a .pdf or .txt file."""
p = Path(path)
if p.suffix.lower() == ".pdf":
return extract_text_from_pdf(path)
return p.read_text(encoding="utf-8")
def split_into_sections(text: str) -> list[dict]:
"""
Split contract text into logical sections based on numbered headings.
Returns list of {heading, content, char_start} dicts.
"""
# Match common legal heading patterns: "1.", "1.1", "SECTION 1", "Article I"
heading_pattern = re.compile(
r'^((?:\d+\.)+\d*|Section\s+\d+|Article\s+[IVX]+|[A-Z][A-Z\s]{3,}:)',
re.MULTILINE | re.IGNORECASE,
)
matches = list(heading_pattern.finditer(text))
sections = []
for i, match in enumerate(matches):
heading = match.group().strip()
start = match.end()
end = matches[i + 1].start() if i + 1 < len(matches) else len(text)
content = text[start:end].strip()
if len(content) > 50: # Skip trivially short sections
sections.append({
"heading": heading,
"content": content,
"char_start": match.start(),
})
return sections
def chunk_for_analysis(text: str, max_tokens: int = 3000) -> list[str]:
"""Split text into chunks that fit within the LLM context for analysis."""
splitter = RecursiveCharacterTextSplitter(
chunk_size=max_tokens * 4, # ~4 chars per token
chunk_overlap=200,
separators=["\n\n", "\n", ". "],
)
return splitter.split_text(text)
Step 3: Clause Classification#
# classifier.py
from pydantic import BaseModel, Field
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from typing import Optional
CLAUSE_TYPES = [
"indemnification", "limitation_of_liability", "termination",
"intellectual_property", "payment_terms", "confidentiality",
"governing_law", "dispute_resolution", "warranty", "auto_renewal",
"data_privacy", "force_majeure", "assignment", "other",
]
class ClauseIdentification(BaseModel):
clause_type: str = Field(description=f"One of: {', '.join(CLAUSE_TYPES)}")
relevance_score: float = Field(ge=0.0, le=1.0, description="How central this clause is to the section (0-1)")
party_benefiting: str = Field(description="Which party this clause primarily favors: 'provider', 'customer', 'neutral', 'unclear'")
summary: str = Field(description="One sentence plain-English summary of what this section does")
classifier_llm = ChatOpenAI(model="gpt-4o", temperature=0)
classify_chain = ChatPromptTemplate.from_messages([
("system", """You are a legal contract analyst.
Classify this contract section. Be precise about clause type.
Identify which contracting party benefits from this clause."""),
("human", "Section heading: {heading}\n\nContent:\n{content}"),
]) | classifier_llm.with_structured_output(ClauseIdentification)
def classify_section(heading: str, content: str) -> ClauseIdentification:
return classify_chain.invoke({"heading": heading, "content": content[:2000]})
Step 4: Risk Analysis Engine#
# risk_analyzer.py
from pydantic import BaseModel, Field
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from typing import Optional
RISK_ANALYSIS_SYSTEM = """You are a senior commercial contracts attorney with 20 years of experience.
Analyze this contract clause for risk from the CUSTOMER perspective.
Risk levels:
- CRITICAL: Clause that could result in unlimited liability, IP loss, or unenforceable terms
- HIGH: Significantly unfavorable terms that deviate from market standard
- MEDIUM: Moderately unfavorable or ambiguous terms worth negotiating
- LOW: Minor issues or standard terms that could be improved
- ACCEPTABLE: Market-standard terms with no significant concern
Always compare to market standard. Be specific about what makes a clause risky."""
class ClauseRiskReport(BaseModel):
risk_level: str = Field(description="CRITICAL, HIGH, MEDIUM, LOW, or ACCEPTABLE")
risk_score: float = Field(ge=0.0, le=10.0, description="Numeric risk score (10 = highest risk)")
issues: list[str] = Field(description="List of specific issues identified in this clause")
market_deviation: str = Field(description="How this clause deviates from market standard")
suggested_language: Optional[str] = Field(
default=None,
description="Suggested alternative language for MEDIUM+ risk clauses",
)
negotiation_priority: str = Field(description="must_fix, should_negotiate, or nice_to_have")
risk_llm = ChatOpenAI(model="gpt-4o", temperature=0)
risk_chain = ChatPromptTemplate.from_messages([
("system", RISK_ANALYSIS_SYSTEM),
("human", "Clause type: {clause_type}\n\nClause text:\n{content}"),
]) | risk_llm.with_structured_output(ClauseRiskReport)
# Clause-specific risk templates for higher accuracy
CLAUSE_CONTEXT = {
"indemnification": "Focus on: breadth of indemnified claims, IP infringement carveouts, mutual vs unilateral indemnification, uncapped indemnification.",
"limitation_of_liability": "Focus on: presence/absence of cap, cap amount relative to fees, excluded claim types, consequential damage waivers.",
"intellectual_property": "Focus on: work-for-hire provisions, license scope, background IP, data ownership, AI/ML training clauses.",
"auto_renewal": "Focus on: renewal notice period, cancellation window, price escalation on renewal.",
"termination": "Focus on: termination for cause definitions, cure periods, termination for convenience, effect of termination on licenses.",
}
def analyze_clause_risk(clause_type: str, content: str) -> ClauseRiskReport:
context = CLAUSE_CONTEXT.get(clause_type, "")
full_content = f"{context}\n\n{content}" if context else content
return risk_chain.invoke({"clause_type": clause_type, "content": full_content[:2500]})
Step 5: Report Generator#
# report_generator.py
import json
from datetime import datetime
from dataclasses import dataclass, field
from classifier import ClauseIdentification
from risk_analyzer import ClauseRiskReport
@dataclass
class ClauseReview:
heading: str
classification: ClauseIdentification
risk: ClauseRiskReport
@dataclass
class ContractReviewReport:
document_name: str
review_date: str
total_clauses_analyzed: int
clause_reviews: list[ClauseReview] = field(default_factory=list)
@property
def critical_issues(self) -> list[ClauseReview]:
return [c for c in self.clause_reviews if c.risk.risk_level == "CRITICAL"]
@property
def high_risk_issues(self) -> list[ClauseReview]:
return [c for c in self.clause_reviews if c.risk.risk_level == "HIGH"]
@property
def overall_risk_score(self) -> float:
if not self.clause_reviews:
return 0.0
return round(sum(c.risk.risk_score for c in self.clause_reviews) / len(self.clause_reviews), 1)
def to_markdown(self) -> str:
lines = [
f"# Contract Review Report: {self.document_name}",
f"**Date**: {self.review_date}",
f"**Overall Risk Score**: {self.overall_risk_score}/10",
f"**Critical Issues**: {len(self.critical_issues)}",
f"**High Risk Issues**: {len(self.high_risk_issues)}",
"",
"---",
"",
"## Priority Issues (Must Fix)",
]
must_fix = [c for c in self.clause_reviews if c.risk.negotiation_priority == "must_fix"]
for item in must_fix:
lines.extend([
f"### [{item.risk.risk_level}] {item.heading}",
f"**Type**: {item.classification.clause_type}",
f"**Risk Score**: {item.risk.risk_score}/10",
"",
"**Issues**:",
*[f"- {issue}" for issue in item.risk.issues],
"",
f"**Market Deviation**: {item.risk.market_deviation}",
])
if item.risk.suggested_language:
lines.extend(["", f"**Suggested Language**: {item.risk.suggested_language}"])
lines.append("")
lines.extend(["---", "", "## Full Clause Analysis"])
for item in self.clause_reviews:
lines.extend([
f"### {item.heading}",
f"Type: {item.classification.clause_type} | Risk: {item.risk.risk_level} ({item.risk.risk_score}/10)",
f"Summary: {item.classification.summary}",
"",
])
return "\n".join(lines)
Step 6: The Review Pipeline#
# pipeline.py
from ingestion import extract_text_from_file, split_into_sections
from classifier import classify_section
from risk_analyzer import analyze_clause_risk
from report_generator import ContractReviewReport, ClauseReview
from datetime import datetime
def review_contract(file_path: str) -> ContractReviewReport:
"""Run the full contract review pipeline on a document."""
print(f"[1/4] Loading document: {file_path}")
text = extract_text_from_file(file_path)
print("[2/4] Segmenting into clauses...")
sections = split_into_sections(text)
print(f" Found {len(sections)} sections")
report = ContractReviewReport(
document_name=file_path.split("/")[-1],
review_date=datetime.now().strftime("%Y-%m-%d"),
total_clauses_analyzed=len(sections),
)
print("[3/4] Analyzing each clause...")
for i, section in enumerate(sections[:20]): # Cap at 20 clauses for cost control
print(f" Analyzing section {i+1}/{min(len(sections), 20)}: {section['heading'][:50]}")
try:
classification = classify_section(section["heading"], section["content"])
risk = analyze_clause_risk(classification.clause_type, section["content"])
report.clause_reviews.append(ClauseReview(
heading=section["heading"],
classification=classification,
risk=risk,
))
except Exception as e:
print(f" Warning: Could not analyze section '{section['heading']}': {e}")
print("[4/4] Generating report...")
return report
# Example usage
if __name__ == "__main__":
import sys
if len(sys.argv) < 2:
print("Usage: python pipeline.py <contract.pdf>")
sys.exit(1)
report = review_contract(sys.argv[1])
print("\n" + "="*60)
print(report.to_markdown())
print(f"\nReview complete. {len(report.critical_issues)} critical issues found.")
For production deployments, add Langfuse observability to track analysis quality, and integrate human-in-the-loop approval workflows so attorneys can confirm or override agent findings. Containerize with the Docker deployment guide.
What's Next#
- Explore legal AI agent use cases for more application patterns
- Add semantic clause comparison with agentic RAG to compare against a library of standard clauses
- Build a human-in-the-loop review workflow for attorney sign-off on agent findings
- Review AI agent security best practices for handling confidential legal documents
- Add tracing with Langfuse to audit every review for quality assurance