diff --git a/tamingllms/_build/.doctrees/environment.pickle b/tamingllms/_build/.doctrees/environment.pickle index 31c0d87..b41e8a4 100644 Binary files a/tamingllms/_build/.doctrees/environment.pickle and b/tamingllms/_build/.doctrees/environment.pickle differ diff --git a/tamingllms/_build/.doctrees/markdown/intro.doctree b/tamingllms/_build/.doctrees/markdown/intro.doctree index c613244..ff5ecf3 100644 Binary files a/tamingllms/_build/.doctrees/markdown/intro.doctree and b/tamingllms/_build/.doctrees/markdown/intro.doctree differ diff --git a/tamingllms/_build/html/_sources/markdown/intro.md b/tamingllms/_build/html/_sources/markdown/intro.md index 8166f6b..cc89eaf 100644 --- a/tamingllms/_build/html/_sources/markdown/intro.md +++ b/tamingllms/_build/html/_sources/markdown/intro.md @@ -62,10 +62,11 @@ The current discourse around LLMs tends toward extremes—either uncritical enth ## Who This Book Is For -This book is designed for: +This book is inteded to Software Developers taking their first steps with Large Language Models. It provides critical insights into the practical challenges of LLM implementation, along with guidance on leveraging open source tools and frameworks to avoid common pitfalls that could derail projects. The goal is to help developers understand and address these challenges early, before they become costly problems too late in the software development lifecycle. -- Software Engineers building LLM-powered applications -- Product Managers leading AI initiatives +A broader audience for this book includes: + +- Technical Product Managers leading AI initiatives - Technical Leaders making architectural decisions - Anyone seeking to understand the practical challenges of working with LLMs diff --git a/tamingllms/_build/html/markdown/intro.html b/tamingllms/_build/html/markdown/intro.html index 51a7f61..3af431c 100644 --- a/tamingllms/_build/html/markdown/intro.html +++ b/tamingllms/_build/html/markdown/intro.html @@ -262,10 +262,10 @@

1.4. Who This Book Is For

-

This book is designed for:

+

This book is inteded to Software Developers taking their first steps with Large Language Models. It provides critical insights into the practical challenges of LLM implementation, along with guidance on leveraging open source tools and frameworks to avoid common pitfalls that could derail projects. The goal is to help developers understand and address these challenges early, before they become costly problems too late in the software development lifecycle.

+

A broader audience for this book includes:

diff --git a/tamingllms/_build/html/searchindex.js b/tamingllms/_build/html/searchindex.js index 63369ea..2315b10 100644 --- a/tamingllms/_build/html/searchindex.js +++ b/tamingllms/_build/html/searchindex.js @@ -1 +1 @@ -Search.setIndex({"docnames": ["markdown/intro", "markdown/toc", "notebooks/evals", "notebooks/output_size_limit", "notebooks/structured_output"], "filenames": ["markdown/intro.md", "markdown/toc.md", "notebooks/evals.ipynb", "notebooks/output_size_limit.ipynb", "notebooks/structured_output.ipynb"], "titles": ["1. Introduction", "Taming Large Language Models with Open Source Software", "4. Challenges of Evaluating LLM-based Applications", "2. Output Size Limitations", "3. Wrestling with Structured Output"], "terms": {"am": 0, "alwai": [0, 2, 4], "do": [0, 2, 3, 4], "which": [0, 2, 3, 4], "cannot": [0, 2], "order": [0, 2, 4], "mai": [0, 2, 3, 4], "learn": [0, 2, 4], "how": [0, 2, 3, 4], "pablo": 0, "picasso": 0, "In": [0, 2, 3, 4], "recent": [0, 4], "year": [0, 2, 3, 4], "larg": [0, 2, 3, 4], "languag": [0, 2, 3, 4], "model": 0, "llm": [0, 3], "have": [0, 2, 3, 4], "emerg": [0, 1], "transform": [0, 2, 4], "forc": [0, 2], "technologi": [0, 2, 3, 4], "promis": 0, "revolution": 0, "build": [0, 1, 2, 3], "product": [0, 2, 4], "interact": [0, 2, 3, 4], "comput": [0, 2, 3], "from": [0, 2, 3, 4], "chatgpt": [0, 2], "github": [0, 4], "copilot": 0, "claud": [0, 2, 3], "artifact": 0, "cursor": 0, "com": [0, 4], "replit": 0, "other": [0, 2, 3, 4], "system": [0, 2, 3, 4], "captur": [0, 2], "public": 0, "imagin": 0, "spark": 0, "gold": 0, "rush": 0, "ai": [0, 4], "power": [0, 2, 3, 4], "applic": [0, 3, 4], "howev": [0, 2, 3, 4], "beneath": 0, "surfac": [0, 2], "technolog": 0, "revolut": 0, "li": 0, "complex": [0, 2, 3, 4], "landscap": [0, 2], "practition": 0, "must": [0, 2, 3], "navig": 0, "As": [0, 2, 3, 4], "explor": [0, 2, 4], "engin": [0, 2, 4], "effort": [0, 4], "requir": [0, 2, 3, 4], "manag": [0, 1, 2, 3, 4], "handl": [0, 1, 2, 3, 4], "non": [0, 1, 4], "determinist": [0, 1], "output": [0, 2], "prevent": [0, 2, 4], "hallucin": [0, 4], "overst": 0, "while": [0, 2, 3, 4], "potenti": [0, 2, 3, 4], "remain": [0, 2, 3], "compel": [0, 4], "understand": [0, 1, 2, 3, 4], "hidden": 0, "cost": [0, 2, 4], "reliabl": [0, 2, 4], "enabl": [0, 2, 3, 4], "u": [0, 2, 4], "fulli": [0, 2, 3], "har": [0, 3], "impact": [0, 2, 3, 4], "fortun": 0, "grow": [0, 2, 4], "ecosystem": 0, "open": [0, 2, 3, 4], "sourc": [0, 2, 4], "solut": [0, 1, 2, 3], "best": [0, 1, 2], "help": [0, 2, 3, 4], "tackl": 0, "limit": [0, 2, 4], "rang": [0, 2, 3, 4], "famili": 0, "llama": [0, 1], "mistral": 0, "framework": [0, 2, 4], "ollama": [0, 1], "langchain": [0, 1, 3], "packag": 0, "outlin": [0, 1, 2], "promptfoo": 0, "format": [0, 2, 3, 4], "llamafil": [0, 1], "txt": [0, 2, 3, 4], "hub": 0, "like": [0, 2, 3, 4], "huggingfac": [0, 2], "equip": 0, "battl": 0, "test": [0, 1, 2], "tool": [0, 3], "overcom": [0, 3], "develop": [0, 2, 3, 4], "launch": [0, 2], "robust": [0, 2, 3, 4], "base": [0, 4], "capabl": [0, 2, 3, 4], "ar": [0, 1, 2, 4], "inde": 0, "remark": 0, "prevail": 0, "narr": 0, "often": [0, 2, 3, 4], "gloss": 0, "over": [0, 2, 3, 4], "fundament": [0, 2], "problem": [0, 1], "organ": [0, 2, 3], "face": [0, 4], "when": [0, 2, 3, 4], "real": [0, 2, 3, 4], "world": [0, 2, 4], "aim": [0, 3, 4], "bridg": 0, "gap": 0, "offer": [0, 2, 4], "clear": [0, 2, 4], "ei": 0, "examin": [0, 3], "pitfal": [0, 1], "work": [0, 1, 2, 3, 4], "provid": [0, 2, 3], "them": [0, 2, 3, 4], "throughout": [0, 3, 4], "follow": [0, 2, 3, 4], "exhaust": 0, "list": [0, 2, 3, 4], "critic": [0, 2, 3, 4], "behavior": [0, 2], "unlik": [0, 2], "tradit": 0, "softwar": [0, 4], "can": [0, 2, 3, 4], "produc": [0, 2, 4], "differ": [0, 2, 3, 4], "ident": [0, 2], "input": [0, 2, 3, 4], "make": [0, 2, 3, 4], "assur": 0, "particularli": [0, 2, 3, 4], "structur": [0, 2, 3], "un": 0, "struggl": [0, 4], "maintain": [0, 2, 3, 4], "consist": [0, 2, 3, 4], "complic": 0, "integr": [0, 2, 4], "larger": [0, 2, 3], "error": [0, 4], "more": [0, 2, 3, 4], "These": [0, 2, 3], "gener": [0, 1], "plausibl": 0, "sound": 0, "entir": [0, 2, 3], "fabric": 0, "inform": [0, 2, 3, 4], "creat": [0, 2, 3, 4], "signific": [0, 2, 3, 4], "risk": [0, 2, 3], "optim": [0, 1, 3], "The": [0, 3], "financi": [0, 2, 3, 4], "oper": [0, 2, 3], "quickli": [0, 3], "becom": [0, 2], "prohibit": 0, "without": [0, 2, 3, 4], "care": [0, 2, 4], "methodologi": [0, 4], "break": [0, 2, 3], "down": [0, 2, 3], "deal": 0, "new": [0, 2, 3, 4], "take": [0, 2, 3, 4], "hand": [0, 3, 4], "concret": 0, "exampl": [0, 1], "you": [0, 2, 3, 4], "run": [0, 2, 4], "modifi": 0, "scenario": [0, 2], "strategi": [0, 1, 2, 3], "techniqu": [0, 1, 2, 3], "pattern": [0, 1, 2, 4], "anti": 0, "look": [0, 2], "our": [0, 2, 3, 4], "goal": [0, 3], "discourag": 0, "us": [0, 2, 3, 4], "implement": [0, 1, 2, 3, 4], "By": [0, 3, 4], "upfront": 0, "better": [0, 3], "leverag": [0, 2, 3, 4], "effect": [0, 2, 3, 4], "avoid": [0, 2, 4], "current": [0, 3], "discours": 0, "around": [0, 3, 4], "tend": [0, 2], "toward": [0, 2], "extrem": 0, "either": [0, 3], "uncrit": 0, "enthusiasm": 0, "wholesal": 0, "dismiss": 0, "focu": [0, 2, 3], "rather": [0, 2], "than": [0, 2], "theoret": 0, "first": [0, 2, 3], "everi": 0, "concept": 0, "illustr": [0, 2, 3], "execut": [0, 2], "immedi": 0, "analysi": [0, 1, 2, 3], "balanc": [0, 2, 3, 4], "both": [0, 2], "reader": 0, "decis": [0, 4], "design": [0, 1, 3, 4], "lead": [0, 2, 3, 4], "initi": [0, 2, 3], "technic": [0, 2, 3], "leader": 0, "architectur": [0, 3], "anyon": 0, "seek": 0, "typic": [0, 2, 3], "job": 0, "role": [0, 2, 3, 4], "platform": [0, 3], "backend": 0, "exist": [0, 2], "ml": 0, "transit": [0, 2, 3], "overse": 0, "genai": 0, "motiv": 0, "need": [0, 2, 3, 4], "readi": [0, 2], "desir": [0, 2, 4], "perform": [0, 1, 2, 3, 4], "ensur": [0, 2, 3, 4], "safeti": [0, 4], "after": [0, 3], "read": [0, 2, 3, 4], "implic": [0, 1, 2], "experi": [0, 2, 3, 4], "recommend": [0, 3, 4], "abl": [0, 3, 4], "deploi": [0, 3], "proper": [0, 4], "safeguard": 0, "realist": 0, "estim": 0, "project": 0, "timelin": 0, "To": [0, 2, 3, 4], "most": [0, 2, 3, 4], "should": [0, 2, 3, 4], "basic": [0, 2, 3], "program": [0, 2], "knowledg": [0, 2], "introductori": 0, "e": [0, 2, 3, 4], "g": [0, 2, 3, 4], "chat": [0, 2, 3, 4], "prompt": [0, 1, 2], "templat": [0, 1, 2], "access": [0, 4], "openai": [0, 2, 4], "anthrop": [0, 4], "similar": [0, 2, 4], "grade": 0, "befor": 0, "dive": 0, "here": [0, 2, 3, 4], "get": [0, 2, 3, 4], "start": 0, "activ": [0, 2], "virtual": 0, "m": 0, "venv": 0, "env": [0, 2, 3, 4], "bin": 0, "On": [0, 4], "window": [0, 1], "script": 0, "instal": [0, 2, 4], "pip": [0, 2, 4], "r": [0, 2, 3, 4], "file": [0, 3, 4], "root": 0, "directori": 0, "add": [0, 3], "sensit": [0, 2], "openai_api_kei": 0, "your_openai_api_key_her": 0, "never": 0, "share": [0, 2, 4], "commit": 0, "version": [0, 2, 4], "control": [0, 2, 4], "It": [0, 2, 3, 4], "contain": [0, 2, 3], "kept": 0, "privat": 0, "clone": 0, "companion": 0, "git": 0, "http": [0, 2, 4], "souzatharsi": 0, "tamingllm": 0, "cd": 0, "If": [0, 2, 4], "encount": 0, "rate": [0, 2], "consid": [0, 2, 3, 4], "smaller": [0, 2, 3, 4], "retri": [0, 4], "logic": [0, 2, 3], "conflict": 0, "try": [0, 2, 4], "fresh": 0, "poetri": 0, "check": [0, 2], "page": 0, "known": [0, 2, 4], "now": [0, 2, 3, 4], "let": [0, 2, 3, 4], "begin": 0, "dr": 0, "tharsi": 0, "souza": 0, "scientist": 0, "special": [0, 2, 4], "he": 0, "lectur": 0, "columbia": 0, "univers": 0, "master": 0, "scienc": [0, 2], "appli": [0, 2, 3], "analyt": 0, "head": [0, 3], "equiti": 0, "citadel": 0, "former": [0, 2], "senior": 0, "vp": 0, "two": [0, 2, 3, 4], "sigma": 0, "invest": [0, 2, 4], "With": 0, "15": [0, 2, 4], "deliv": 0, "across": [0, 2], "startup": 0, "500": 0, "compani": [0, 2, 3, 4], "global": 0, "also": [0, 2, 3, 4], "an": [0, 2, 3, 4], "numer": [0, 2], "scholarli": 0, "frequent": 0, "speaker": 0, "academ": 0, "busi": [0, 2], "confer": 0, "ground": [0, 1, 2], "background": [0, 3], "draw": 0, "scale": [0, 2, 4], "earli": 0, "stage": 0, "major": [0, 4], "institut": 0, "well": [0, 2, 4], "advis": 0, "profit": [0, 3, 4], "contribut": [0, 3], "bring": 0, "uniqu": [0, 2], "between": [0, 2, 3], "next": [0, 2, 4], "hold": 0, "ph": 0, "d": [0, 4], "ucl": 0, "london": 0, "phil": 0, "sc": 0, "b": [0, 4], "practic": [1, 2, 3], "guid": [1, 2, 4], "python": [1, 4], "core": [1, 2], "we": [1, 2, 3, 4], "ll": [1, 2], "address": [1, 2, 3, 4], "approach": [1, 2, 3, 4], "note": [1, 2, 3, 4], "perspect": 1, "who": [1, 3], "thi": [1, 2, 3, 4], "book": 1, "i": [1, 2, 3, 4], "For": [1, 2, 3, 4], "outcom": [1, 2], "prerequisit": 1, "set": [1, 2, 3, 4], "up": [1, 2, 3, 4], "your": [1, 3, 4], "environ": [1, 2, 3, 4], "setup": [1, 2], "api": [1, 2], "kei": [1, 4], "configur": [1, 2], "code": [1, 2, 4], "repositori": 1, "troubleshoot": 1, "common": [1, 2, 3, 4], "issu": [1, 2, 3, 4], "about": [1, 2, 3, 4], "author": 1, "": [1, 2, 3, 4], "statement": 1, "One": [1, 2], "shot": 1, "specif": [1, 2, 3], "json": 1, "mode": 1, "multipl": [1, 2, 3], "choic": [1, 2], "pydant": [1, 2], "discuss": [1, 2], "compar": [1, 2, 3], "research": [1, 2, 3, 4], "ongo": 1, "debat": 1, "conclus": [1, 2], "acknowledg": 1, "refer": 1, "context": [1, 2, 3, 4], "constraint": [1, 3], "long": [1, 2], "token": [1, 2, 4], "chunk": 1, "form": [1, 2, 4], "content": 1, "what": [1, 2, 4], "contextu": [1, 2], "link": 1, "step": [1, 2, 4], "write": [1, 2], "construct": 1, "dynam": [1, 2], "paramet": [1, 2, 4], "report": [1, 2, 4], "usag": [1, 2], "futur": [1, 2], "consider": [1, 4], "machin": 1, "temperatur": [1, 3], "sampl": [1, 3, 4], "spectrum": 1, "properti": 1, "eval": 1, "conceptu": [1, 4], "overview": [1, 4], "compon": 1, "metric": 1, "human": [1, 3, 4], "leaderboard": 1, "type": [1, 2, 3, 4], "detect": [1, 4], "retriev": 1, "augment": [1, 2], "rag": 1, "select": [1, 2], "index": [1, 3], "vector": 1, "store": [1, 3], "method": [1, 2, 3, 4], "pipelin": 1, "valid": [1, 2, 4], "guard": 1, "filter": 1, "sanit": 1, "monitor": 1, "alert": 1, "cach": 1, "invalid": [1, 4], "predict": [1, 2, 4], "vendor": [1, 2], "lock": 1, "self": 1, "host": 1, "deploy": 1, "migrat": 1, "commun": 1, "surprisingli": 2, "all": [2, 3, 4], "greg": 2, "brockman": 2, "presid": 2, "natur": [2, 3, 4], "where": [2, 3], "same": [2, 3, 4], "respons": [2, 3, 4], "each": [2, 3], "time": [2, 3, 4], "thei": [2, 3, 4], "re": [2, 3, 4], "queri": 2, "even": [2, 3, 4], "data": [2, 3, 4], "characterist": 2, "strength": 2, "ask": [2, 4], "ani": [2, 3, 4], "question": [2, 4], "isn": 2, "t": [2, 3, 4], "bug": 2, "featur": [2, 4], "random": 2, "allow": [2, 3, 4], "creativ": [2, 4], "divers": [2, 3, 4], "incredibli": 2, "difficult": 2, "testabl": 2, "servic": [2, 3, 4], "advic": 2, "mean": [2, 3, 4], "market": [2, 3, 4], "could": [2, 3], "yield": 2, "exceedingli": 2, "regulatori": 2, "complianc": [2, 4], "guarante": [2, 4], "user": [2, 3, 4], "trust": 2, "affect": 2, "inconsist": [2, 4], "primari": 2, "determin": [2, 3, 4], "come": [2, 3, 4], "dure": 2, "text": [2, 3, 4], "calcul": 2, "probabl": [2, 4], "distribut": [2, 4], "nucleu": 2, "coher": [2, 3], "0": [2, 3, 4], "repetit": [2, 3], "1": [2, 4], "increas": [2, 3, 4], "incoher": 2, "dotenv": [2, 3, 4], "import": [2, 3, 4], "load_dotenv": [2, 3, 4], "o": [2, 3, 4], "load": [2, 3, 4], "variabl": [2, 3, 4], "panda": 2, "pd": 2, "def": [2, 3, 4], "generate_respons": 2, "model_nam": [2, 3], "str": [2, 3, 4], "float": [2, 3], "attempt": [2, 3], "int": [2, 3], "3": [2, 4], "datafram": 2, "demonstr": [2, 3, 4], "client": [2, 4], "result": [2, 3, 4], "temp": 2, "complet": [2, 3, 4], "messag": [2, 4], "max_token": 2, "50": 2, "append": [2, 3], "displai": 2, "group": [2, 3], "df_result": 2, "print": [2, 3, 4], "f": [2, 3, 4], "ntemperatur": 2, "40": 2, "temp_respons": 2, "_": 2, "row": 2, "iterrow": 2, "return": [2, 3, 4], "max_length": [2, 4], "10000": [2, 3, 4], "length": [2, 4], "appl": [2, 3, 4], "sec_fil": [2, 4], "gpt": [2, 3, 4], "5": [2, 3, 4], "turbo": [2, 3, 4], "singl": [2, 3, 4], "summari": [2, 4], "2": [2, 4], "inc": [2, 3, 4], "its": [2, 3, 4], "10": [2, 3, 4], "k": [2, 3, 4], "fiscal": [2, 3], "end": [2, 3], "septemb": [2, 3], "28": [2, 3], "2024": [2, 3, 4], "detail": [2, 3, 4], "season": 2, "issuer": 2, "california": [2, 4], "manufactur": 2, "smartphon": 2, "person": [2, 4], "tablet": 2, "wearabl": [2, 4], "accessori": 2, "innov": [2, 3], "condit": 2, "secur": [2, 3], "exchang": [2, 3], "commiss": [2, 3], "factor": [2, 3], "invdestacksmeticsisdict": 2, "setispect": 2, "20cyan": 2, "evaluationseld": 2, "anvis": 2, "droitent": 2, "discernminerv": 2, "versbobprefvers": 2, "vo\u8be5": 2, "option\u548c": 2, "meio": 2, "forecast": 2, "\u0432\u0440\u0435\u043ccisco": 2, "dellaischenpoihscap": 2, "geme": 2, "gettim": 2, "comprehens": [2, 3, 4], "simpl": [2, 3], "reveal": 2, "dramat": [2, 4], "alter": 2, "wai": [2, 3, 4], "systemat": 2, "assess": [2, 3], "At": 2, "too": [2, 3], "rigid": 2, "vari": 2, "less": 2, "wildli": 2, "inadequ": 2, "profound": 2, "one": [2, 3, 4], "radic": 2, "reli": [2, 4], "grappl": 2, "probabilist": 2, "lower": [2, 4], "seem": [2, 4], "safer": 2, "don": [2, 3, 4], "elimin": 2, "underli": [2, 4], "uncertainti": 2, "mere": 2, "mask": 2, "highlight": [2, 3, 4], "paradigm": 2, "aspect": [2, 3, 4], "beyond": 2, "present": [2, 3, 4], "anoth": 2, "fascin": 2, "abil": [2, 4], "spontan": 2, "aris": 2, "size": [2, 4], "answer": [2, 3, 4], "reason": [2, 3, 4], "aren": 2, "explicitli": 2, "train": 2, "against": 2, "wtb": 2, "22": 2, "fig": [2, 3, 4], "4": 2, "relationship": 2, "linear": 2, "below": [2, 3], "certain": [2, 3, 4], "threshold": 2, "absent": 2, "simpli": [2, 3, 4], "matter": 2, "much": 2, "coax": 2, "out": [2, 3], "onc": [2, 3], "reach": [2, 3, 4], "point": [2, 3], "journei": 2, "suddenli": 2, "manifest": 2, "call": [2, 3, 4], "phase": 2, "shift": 2, "inabl": 2, "unpredict": [2, 4], "stand": 2, "stark": 2, "contrast": 2, "deliber": 2, "convent": 2, "stabl": 2, "suit": 2, "defin": [2, 3, 4], "accept": 2, "criteria": 2, "contend": 2, "constantli": 2, "7b": 2, "70b": 2, "ha": [2, 4], "rethink": 2, "custom": [2, 4], "support": [2, 4], "chatbot": 2, "would": [2, 3, 4], "refund": 2, "request": [2, 3, 4], "track": 2, "verifi": [2, 4], "function": [2, 3, 4], "But": 2, "just": [2, 3, 4], "predefin": [2, 4], "convers": [2, 3, 4], "appropri": [2, 3, 4], "emot": 2, "rais": [2, 3], "measur": 2, "weren": 2, "evolv": [2, 3], "accuraci": 2, "subject": 2, "qualiti": [2, 3, 4], "kind": 2, "account": 2, "sever": [2, 3, 4], "dimens": 2, "necessirali": 2, "pre": 2, "extend": 2, "explicit": [2, 4], "usual": 2, "precis": 2, "involv": [2, 4], "resist": 2, "straightforward": [2, 3], "quantif": 2, "score": [2, 4], "judgment": 2, "inher": [2, 3, 4], "depend": 2, "dataset": 2, "contamin": 2, "carefulli": [2, 4], "craft": [2, 4], "case": [2, 3, 4], "expect": [2, 3, 4], "unit": [2, 3], "massiv": 2, "internet": 2, "alreadi": 2, "seen": 2, "memor": 2, "artifici": 2, "inflat": 2, "curat": 2, "truli": 2, "unseen": 2, "rigor": 2, "cross": 2, "benchmark": 2, "evolut": 2, "continu": [2, 3, 4], "advanc": [2, 3], "longitudin": 2, "comparison": [2, 4], "obsolet": 2, "older": 2, "autom": [2, 4], "demand": 2, "oversight": 2, "bias": [2, 4], "through": [2, 3, 4], "annot": 2, "review": [2, 4], "process": [2, 3, 4], "mostli": 2, "distinct": 2, "versu": 2, "latter": 2, "foundat": [2, 3], "purpos": [2, 4], "tailor": 2, "particular": [2, 4], "combin": [2, 3], "associ": [2, 3], "solv": [2, 4], "That": [2, 4], "differenti": 2, "becaus": 2, "chang": 2, "scope": [2, 3], "includ": [2, 3, 4], "thing": [2, 4], "meet": 2, "close": 2, "ti": 2, "align": [2, 3, 4], "object": [2, 4], "A": [2, 3], "great": [2, 4], "doesn": [2, 3, 4], "observ": [2, 4], "three": 2, "app": 2, "imag": 2, "audio": 2, "etc": [2, 4], "truth": 2, "option": [2, 3, 4], "standard": 2, "layer": [2, 3, 4], "repres": [2, 4], "palm": 2, "individu": [2, 3], "target": [2, 4], "further": [2, 3, 4], "see": [2, 4], "avail": [2, 3, 4], "addition": 2, "shown": 2, "fix": [2, 3], "default": [2, 4], "quantifi": 2, "rank": 2, "easi": [2, 3], "addit": [2, 3], "quantit": 2, "among": 2, "per": [2, 3], "aggreg": 2, "heavili": 2, "plan": 2, "pertain": 2, "previous": [2, 3], "doe": [2, 3, 4], "cover": [2, 3], "edg": 2, "good": [2, 4], "bia": 2, "separ": [2, 3], "synthet": 2, "updat": [2, 3], "reflect": 2, "post": 2, "fair": 2, "timeout": 2, "variat": 2, "maxim": 2, "valu": [2, 3, 4], "success": 2, "inter": 2, "rater": 2, "scalabl": [2, 3], "weight": 2, "rel": 2, "priorit": 2, "normal": [2, 4], "absolut": [2, 4], "fail": 2, "confid": [2, 4], "interv": 2, "veri": 2, "tier": 2, "hollist": 2, "built": [2, 4], "mind": 2, "x": 2, "fast": 2, "promot": 2, "rapid": 2, "experiment": [2, 4], "iter": [2, 3], "final": [2, 3, 4], "keep": [2, 3], "itself": 2, "confirm": 2, "vi": 2, "categor": 2, "broad": 2, "categori": 2, "intrins": 2, "extrins": 2, "sequenc": 2, "perplex": 2, "given": [2, 3, 4], "variou": [2, 3, 4], "downstream": [2, 4], "directli": [2, 4], "valuabl": [2, 4], "insight": [2, 3, 4], "generalis": 2, "interest": [2, 3, 4], "sinc": [2, 3], "think": 2, "term": [2, 3], "discrimin": 2, "distinguish": 2, "classifi": 2, "sentiment": [2, 4], "classif": [2, 4], "identifi": [2, 3, 4], "whether": [2, 3, 4], "true": [2, 3, 4], "fals": [2, 4], "synthesi": 2, "log": 2, "discret": 2, "recal": 2, "f1": 2, "match": [2, 4], "exact": 2, "prefix": 2, "translat": 2, "charact": [2, 3], "n": [2, 3], "gram": 2, "being": [2, 4], "guidelin": 2, "subsequ": 2, "section": [2, 3, 4], "tabl": [2, 3], "short": [2, 3], "wide": [2, 3, 4], "along": 2, "definit": [2, 4], "bilingu": 2, "understudi": 2, "overlap": [2, 3], "favor": [2, 4], "due": [2, 3], "breviti": 2, "penalti": 2, "insensit": 2, "semant": [2, 3], "high": [2, 3], "orient": 2, "gist": 2, "word": [2, 3, 4], "sentenc": [2, 3, 4], "focus": [2, 3, 4], "ignor": 2, "equival": 2, "influenc": 2, "meteor": 2, "synonym": 2, "stem": 2, "paraphras": 2, "alongsid": 2, "computation": [2, 3], "expens": 2, "databas": [2, 4], "cider": 2, "consensu": 2, "descript": [2, 4], "tf": 2, "idf": 2, "caption": 2, "outsid": 2, "reliant": 2, "corpu": 2, "statist": 2, "ter": 2, "edit": 2, "number": [2, 3, 4], "convert": [2, 4], "hypothesi": 2, "correct": [2, 4], "penal": 2, "bertscor": 2, "embed": [2, 3], "bert": 2, "spice": 2, "proposit": 2, "scene": 2, "graph": 2, "emphasi": 2, "onli": [2, 3, 4], "pure": 2, "textual": 2, "want": [2, 3], "extract": [2, 3, 4], "analyst": [2, 3], "prepar": [2, 3], "dictionari": [2, 4], "rouge_1": 2, "rouge_2": 2, "ideal": [2, 4], "humana": 2, "expert": [2, 3, 4], "cheaper": 2, "4o": [2, 3, 4], "mini": [2, 3, 4], "evaluate_summari": 2, "unigram": 2, "bigram": 2, "librari": [2, 3, 4], "absl": 2, "py": 2, "rouge_scor": 2, "generated_summari": 2, "reference_summari": 2, "arg": [2, 3], "dict": [2, 3], "google_bleu": 2, "bleu_scor": 2, "rouge1": 2, "rouge2": 2, "instanc": [2, 3], "arbitrari": 2, "chosen": 2, "sentence1": 2, "cat": 2, "sat": 2, "mat": 2, "sentence2": 2, "ate": 2, "3333333333333333": 2, "7272727272727272": 2, "4444444444444445": 2, "generate_summari": 2, "summir": 2, "correspond": [2, 4], "liner": 2, "excerpt": 2, "evaluate_summary_model": 2, "model_benchmark": 2, "models_test": 2, "benchmark_summari": 2, "model_summari": 2, "evaluation_result": 2, "line": 2, "name": [2, 3, 4], "zip": 2, "annual": 2, "regul": [2, 4], "stock": [2, 4], "corpor": 2, "govern": 2, "forward": 2, "analyz": [2, 3], "statu": 2, "concis": 2, "yet": [2, 3], "omit": [2, 4], "essenti": [2, 3], "element": [2, 4], "Its": 2, "adequ": 2, "verbos": 2, "relev": 2, "introduc": [2, 3, 4], "peripher": 2, "quit": [2, 4], "factual": 2, "overli": [2, 4], "simplifi": [2, 4], "miss": 2, "convei": [2, 3], "breadth": 2, "Of": 2, "cours": 2, "abov": 2, "vibe": 2, "visualize_prompt_comparison": 2, "visual": 2, "matplotlib": 2, "radar": 2, "plot": 2, "radar_plot": 2, "show": [2, 3, 4], "tmp": 2, "ipykernel_1652501": 2, "940173201": 2, "userwarn": 2, "figurecanvasagg": 2, "thu": 2, "put": 2, "closest": 2, "largest": 2, "deviat": [2, 4], "suggest": [2, 4], "least": 2, "establish": 2, "otherwis": 2, "qualit": 2, "driven": 2, "might": [2, 3, 4], "nuanc": [2, 3, 4], "especi": [2, 3, 4], "those": [2, 3, 4], "primarili": 2, "level": [2, 3, 4], "granular": [2, 3], "altern": [2, 3], "fall": 2, "judg": 2, "themselv": 2, "act": 2, "fluenci": 2, "interpret": 2, "refin": 2, "centric": 2, "flow": [2, 3], "express": [2, 4], "plain": 2, "ad": 2, "basemodel": [2, 4], "class": [2, 3, 4], "judgeevalu": 2, "expertis": 2, "evaluate_with_llm": 2, "judge_model": 2, "candidate_summari": 2, "candid": 2, "specifi": [2, 3, 4], "wa": [2, 4], "written": 2, "grammat": 2, "y": 2, "z": 2, "w": [2, 3], "beta": [2, 4], "pars": [2, 4], "response_format": [2, 4], "benchmark_model": 2, "test_model": 2, "input_text": [2, 3], "tupl": 2, "iphon": [2, 4], "mac": [2, 4], "ipad": [2, 4], "incorpor": 2, "regard": 2, "regist": 2, "approxim": [2, 4], "6": [2, 3, 4], "trillion": [2, 4], "held": [2, 4], "affili": [2, 4], "billion": 2, "outstand": [2, 4], "octob": [2, 4], "18": [2, 4], "7": [2, 3], "8": [2, 3], "evals_list": 2, "1775618912": 2, "14": [2, 4], "difficulti": 2, "own": [2, 3], "overhead": 2, "infer": 2, "rational": 2, "effici": [2, 3, 4], "replac": 2, "significantli": [2, 3], "workflow": 2, "improv": [2, 3, 4], "domain": 2, "assessor": 2, "resourc": [2, 3], "intens": [2, 4], "standardis": 2, "varieti": 2, "some": [2, 3, 4], "popular": 2, "glue": 2, "collect": [2, 3], "entail": 2, "holist": 2, "view": 2, "superglu": 2, "successor": 2, "push": 2, "boundari": 2, "big": 2, "bench": 2, "collabor": 2, "encompass": 2, "sens": 2, "helm": 2, "flask": 2, "multitask": 2, "mmlu": 2, "57": 2, "social": 2, "humanev": 2, "livebench": 2, "mitig": [2, 3, 4], "releas": [2, 4], "monthli": 2, "leak": 2, "incorrect": 2, "ambigu": 2, "rememb": 2, "jason": 2, "wei": 2, "yi": [2, 4], "tai": 2, "rishi": 2, "bommasani": 2, "colin": 2, "raffel": 2, "barret": 2, "zoph": 2, "sebastian": 2, "borgeaud": 2, "dani": 2, "yogatama": 2, "maarten": 2, "bosma": 2, "denni": 2, "zhou": 2, "donald": 2, "metzler": 2, "ed": 2, "h": 2, "chi": 2, "tatsunori": 2, "hashimoto": 2, "oriol": 2, "vinyal": 2, "perci": 2, "liang": 2, "jeff": 2, "dean": 2, "william": 2, "fedu": 2, "2022": 2, "url": [2, 4], "arxiv": [2, 4], "org": [2, 4], "ab": [2, 4], "2206": 2, "07682": 2, "go": [3, 4], "far": 3, "possibli": 3, "find": [3, 4], "eliot": 3, "english": 3, "rule": 3, "thumb": 3, "\u00be": 3, "max_output_token": 3, "modern": 3, "maximum": 3, "4096": 3, "16384": 3, "contrari": 3, "summar": 3, "surpass": 3, "instead": [3, 4], "stop": 3, "mid": 3, "truncat": 3, "max_input_token": 3, "input_cost_per_token": 3, "output_cost_per_token": 3, "meta": 3, "llama3": 3, "11b": 3, "instruct": [3, 4], "v1": 3, "128000": 3, "5e": 3, "sonnet": 3, "20241022": 3, "8192": 3, "200000": 3, "3e": 3, "0613": 3, "6e": 3, "04": 3, "09": 3, "1e": 3, "gemini": 3, "flash": 3, "002": 3, "1048576": 3, "pro": 3, "2097152": 3, "05e": 3, "pose": [3, 4], "challeng": 3, "incomplet": 3, "extens": [3, 4], "articl": 3, "abruptli": 3, "cut": 3, "off": [3, 4], "disrupt": 3, "shallow": 3, "thorough": 3, "receiv": 3, "partial": 3, "dissatisfact": 3, "frustrat": 3, "educ": 3, "creation": 3, "feasibl": 3, "split": 3, "previou": 3, "10k": 3, "sec": [3, 4], "schemat": 3, "represent": 3, "diagram": 3, "charactertextsplitt": 3, "tiktoken": 3, "sequenti": 3, "chain": 3, "newlin": 3, "There": 3, "situat": 3, "broadli": [3, 4], "decid": 3, "sure": 3, "lost": 3, "path": 3, "mani": [3, 4], "cheap": 3, "speciali": 3, "awar": 3, "advantag": [3, 4], "sophist": 3, "naiv": 3, "period": 3, "nltk": 3, "spaci": 3, "recurs": 3, "divid": 3, "hierarch": 3, "manner": [3, 4], "made": [3, 4], "talk": 3, "theme": 3, "topic": [3, 4], "util": 3, "count": 3, "get_chunk": 3, "chunk_siz": 3, "chunk_overlap": 3, "langchain_text_splitt": 3, "text_splitt": 3, "from_tiktoken_encod": 3, "split_text": 3, "serv": [3, 4], "persona": 3, "assum": 3, "task": [3, 4], "action": 3, "actual": [3, 4], "langchain_cor": [3, 4], "prompttempl": 3, "get_base_prompt_templ": 3, "base_prompt": 3, "from_templ": 3, "llmchain": 3, "togeth": 3, "parser": [3, 4], "output_pars": 3, "stroutputpars": 3, "langchain_commun": 3, "chat_model": 3, "chatlitellm": 3, "get_llm_chain": 3, "prompt_templ": [3, 4], "llm_chain": [3, 4], "api_key_label": 3, "upper": 3, "_api_kei": 3, "api_kei": 3, "get_dynamic_prompt_templ": 3, "get_dynamic_prompt_param": 3, "prompt_param": 3, "part_idx": 3, "total_part": 3, "chat_context": 3, "origin": [3, 4], "part": [3, 4], "total": [3, 4], "param": 3, "dynamic_prompt_param": 3, "copi": 3, "save": 3, "introduct": 3, "elif": 3, "last": [3, 4], "second": 3, "main": [3, 4], "els": 3, "merg": 3, "concaten": 3, "generate_report": 3, "input_cont": 3, "llm_model_nam": 3, "report_part": 3, "num_part": 3, "len": 3, "dinam": 3, "priovid": 3, "enumer": 3, "invok": [3, 4], "cummul": 3, "join": 3, "max_chunk_s": 3, "max_chunk_overlap": 3, "latest": [3, 4], "readabl": 3, "move": 3, "local": [3, 4], "apple_report": 3, "300": 3, "posit": [3, 4], "disclos": 3, "state": 3, "luation": 3, "oblig": 3, "cash": 3, "disciplin": 3, "deeper": 3, "few": [3, 4], "smooth": 3, "upon": 3, "subhead": 3, "clariti": 3, "document": [3, 4], "adher": [3, 4], "revenu": [3, 4], "segment": [3, 4], "liquid": 3, "capit": [3, 4], "inclus": 3, "despit": [3, 4], "depth": 3, "evalu": 3, "overlook": 3, "fit": 3, "within": [3, 4], "meaning": [3, 4], "preserv": 3, "easier": [3, 4], "preprocess": 3, "enhanc": 3, "necessit": 3, "meticul": 3, "retain": 3, "necessari": 3, "seamlessli": 3, "circumv": 3, "therebi": 3, "overal": [3, 4], "escal": 3, "frequenc": 3, "volum": 3, "bottleneck": 3, "latenc": 3, "reduc": 3, "friendli": 3, "mustafa": 3, "suleyman": 3, "infinit": 3, "memori": 3, "amount": [3, 4], "fewer": 3, "compress": 3, "progress": 3, "condens": 3, "adapt": 3, "adjust": [3, 4], "flexibl": [3, 4], "constrain": [3, 4], "versatil": 3, "drive": 3, "grace": 3, "fallback": 3, "empow": 3, "crucial": [3, 4], "stai": 3, "full": [3, 4], "splitter": 3, "freedom": 4, "thrive": 4, "julia": 4, "cameron": 4, "excel": 4, "easili": 4, "sometim": 4, "unstructur": 4, "notebook": 4, "overrid": 4, "response_cont": 4, "wow": 4, "lot": 4, "breakdown": 4, "stream": 4, "portfolio": 4, "impress": 4, "trend": 4, "notic": 4, "march": 4, "29": 4, "huge": 4, "investor": 4, "figur": 4, "compli": 4, "ye": 4, "accur": 4, "date": 4, "transpar": 4, "industri": 4, "serious": 4, "is_json": 4, "myjson": 4, "except": 4, "valueerror": 4, "clearli": 4, "obtain": 4, "lack": 4, "emploi": 4, "schema": 4, "guidanc": 4, "blueprint": 4, "achiev": 4, "nativ": 4, "regular": 4, "dedic": 4, "enforc": 4, "json_format": 4, "person1": 4, "alic": 4, "q1": 4, "20": 4, "person2": 4, "bob": 4, "net": 4, "margin": 4, "materi": 4, "though": 4, "suffici": 4, "nest": 4, "restrict": 4, "todai": 4, "programmat": 4, "depict": 4, "thellm": 4, "via": 4, "unend": 4, "whitespac": 4, "until": 4, "forget": 4, "throw": 4, "string": 4, "appear": 4, "somewher": 4, "json_object": 4, "628": 4, "553": 4, "000": 4, "sheer": 4, "mention": 4, "115": 4, "823": 4, "circul": 4, "plai": 4, "googl": 4, "vertex": 4, "suppli": 4, "so": 4, "worri": 4, "enum": 4, "benefit": 4, "No": 4, "incorrectli": 4, "refus": 4, "simpler": 4, "strongli": 4, "entiti": 4, "ii": 4, "place": 4, "doc": 4, "07": 4, "08": 4, "06": 4, "later": 4, "secextract": 4, "mentioned_ent": 4, "mentioned_plac": 4, "extract_from_sec_fil": 4, "sec_filing_text": 4, "explan": 4, "hint": 4, "send": 4, "attribut": 4, "conform": 4, "prompt_extract": 4, "sec_extract": 4, "nasdaq": 4, "llc": 4, "washington": 4, "c": 4, "cupertino": 4, "usabl": 4, "beg": 4, "abstract": 4, "with_structured_output": 4, "runnabl": 4, "typeddict": 4, "qu": 4, "langchain_openai": 4, "chatopenai": 4, "chatprompttempl": 4, "extract_from_sec_filing_langchain": 4, "structured_llm": 4, "from_messag": 4, "sec_extraction_langchain": 4, "found": 4, "out24": 4, "under": 4, "hood": 4, "logit": 4, "raw": 4, "neural": 4, "network": 4, "prefer": 4, "fine": 4, "grain": 4, "regex": 4, "proprietari": 4, "vllm": 4, "qwen2": 4, "5b": 4, "lightweight": 4, "alibaba": 4, "cloud": 4, "strong": 4, "small": 4, "enough": 4, "hug": 4, "qwen": 4, "top": 4, "100": 4, "label": 4, "assist": 4, "neg": 4, "unexpect": 4, "malform": 4, "back": 4, "pass": 4, "modul": 4, "sec_extraction_outlin": 4, "zsp": 4, "zicorp": 4, "phenomenon": 4, "were": 4, "tune": 4, "simplic": 4, "v": 4, "greater": 4, "steeper": 4, "curv": 4, "wrapper": 4, "wider": 4, "structuredoutputpars": 4, "done": 4, "know": 4, "exactli": 4, "field": 4, "chanc": 4, "connect": 4, "highli": 4, "encourag": 4, "correctli": 4, "xml": 4, "area": 4, "me": 4, "speak": 4, "freeli": 4, "studi": 4, "twt": 4, "24": 4, "impos": 4, "evid": 4, "aid24": 4, "degrad": 4, "decod": 4, "multi": 4, "thought": 4, "strict": 4, "hinder": 4, "outweigh": 4, "team": 4, "rebutt": 4, "argu": 4, "reproduct": 4, "paper": 4, "paint": 4, "pictur": 4, "publicli": 4, "independ": 4, "verif": 4, "dot24": 4, "flaw": 4, "believ": 4, "led": 4, "inaccur": 4, "reconcil": 4, "uneven": 4, "didn": 4, "properli": 4, "conflat": 4, "argument": 4, "trade": 4, "surround": 4, "drawback": 4, "unlock": 4, "thank": 4, "pfiffer": 4, "hi": 4, "feedback": 4, "aider": 4, "html": 4, "dottxt": 4, "sai": 4, "demo": 4, "tree": 4, "safe": 4, "io": 4, "zhi": 4, "rui": 4, "tam": 4, "cheng": 4, "kuang": 4, "wu": 4, "lin": 4, "tsai": 4, "chieh": 4, "yen": 4, "hung": 4, "lee": 4, "yun": 4, "nung": 4, "chen": 4, "2408": 4, "02442": 4}, "objects": {}, "objtypes": {}, "objnames": {}, "titleterms": {"introduct": [0, 1], "content": [0, 2, 3, 4], "core": 0, "challeng": [0, 1, 2, 4], "we": 0, "ll": 0, "address": 0, "A": [0, 1, 4], "practic": [0, 4], "approach": 0, "note": 0, "perspect": 0, "who": 0, "thi": 0, "book": 0, "i": 0, "For": 0, "outcom": 0, "prerequisit": 0, "set": 0, "up": 0, "your": 0, "environ": 0, "python": 0, "setup": 0, "api": [0, 4], "kei": [0, 2, 3], "configur": 0, "code": 0, "repositori": 0, "troubleshoot": 0, "common": 0, "issu": 0, "about": 0, "author": 0, "": 0, "tame": 1, "larg": 1, "languag": 1, "model": [1, 2, 3, 4], "open": 1, "sourc": 1, "softwar": [1, 2], "chapter": 1, "1": [1, 3], "2": [1, 3], "wrestl": [1, 4], "structur": [1, 4], "output": [1, 3, 4], "3": [1, 3], "input": 1, "size": [1, 3], "length": [1, 3], "limit": [1, 3], "4": [1, 3], "5": 1, "evalu": [1, 2], "llm": [1, 2, 4], "base": [1, 2, 3], "applic": [1, 2], "6": 1, "hallucin": 1, "The": [1, 2, 4], "realiti": 1, "gap": 1, "7": 1, "safeti": 1, "concern": 1, "8": 1, "cost": [1, 3], "factor": 1, "9": 1, "break": 1, "free": 1, "from": 1, "cloud": 1, "provid": [1, 4], "appendix": 1, "tool": [1, 2, 4], "resourc": 1, "non": 2, "determinist": 2, "machin": 2, "temperatur": 2, "sampl": 2, "spectrum": 2, "emerg": 2, "properti": 2, "problem": [2, 3, 4], "statement": [2, 3, 4], "eval": 2, "tradit": 2, "v": 2, "design": 2, "conceptu": 2, "overview": 2, "consider": [2, 3], "compon": 2, "metric": 2, "gener": [2, 3, 4], "task": 2, "exampl": [2, 3, 4], "bleu": 2, "roug": 2, "sec": 2, "file": 2, "summar": 2, "human": 2, "leaderboard": 2, "refer": [2, 3, 4], "what": 3, "ar": 3, "token": 3, "comparison": 3, "across": 3, "chunk": 3, "contextu": 3, "link": 3, "long": 3, "form": 3, "step": 3, "write": 3, "prompt": [3, 4], "templat": 3, "construct": 3, "dynam": 3, "paramet": 3, "report": 3, "usag": 3, "discuss": [3, 4], "implic": 3, "futur": 3, "conclus": [3, 4], "solut": 4, "strategi": 4, "techniqu": 4, "One": 4, "shot": 4, "specif": 4, "json": 4, "mode": 4, "langchain": 4, "outlin": 4, "simpl": 4, "multipl": 4, "choic": 4, "pydant": 4, "compar": 4, "best": 4, "ongo": 4, "debat": 4, "acknowledg": 4}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinxcontrib.bibtex": 9, "sphinx": 57}, "alltitles": {"Introduction": [[0, "introduction"]], "Contents": [[0, "contents"], [2, "contents"], [3, "contents"], [4, "contents"]], "Core Challenges We\u2019ll Address": [[0, "core-challenges-we-ll-address"]], "A Practical Approach": [[0, "a-practical-approach"]], "A Note on Perspective": [[0, "a-note-on-perspective"]], "Who This Book Is For": [[0, "who-this-book-is-for"]], "Outcomes": [[0, "outcomes"]], "Prerequisites": [[0, "prerequisites"]], "Setting Up Your Environment": [[0, "setting-up-your-environment"]], "Python Environment Setup": [[0, "python-environment-setup"]], "API Keys Configuration": [[0, "api-keys-configuration"]], "Code Repository": [[0, "code-repository"]], "Troubleshooting Common Issues": [[0, "troubleshooting-common-issues"]], "About the Author(s)": [[0, "about-the-author-s"]], "Taming Large Language Models with Open Source Software": [[1, "taming-large-language-models-with-open-source-software"]], "Chapter 1: Introduction": [[1, "chapter-1-introduction"]], "Chapter 2: Wrestling with Structured Output": [[1, "chapter-2-wrestling-with-structured-output"]], "Chapter 3: Input Size and Length Limitations": [[1, "chapter-3-input-size-and-length-limitations"]], "Chapter 4: Output Size and Length Limitations": [[1, "chapter-4-output-size-and-length-limitations"]], "Chapter 5: Challenges of Evaluating LLM-based Applications": [[1, "chapter-5-challenges-of-evaluating-llm-based-applications"]], "Chapter 6: Hallucination: The Reality Gap": [[1, "chapter-6-hallucination-the-reality-gap"]], "Chapter 7: Safety Concerns": [[1, "chapter-7-safety-concerns"]], "Chapter 8: The Cost Factor": [[1, "chapter-8-the-cost-factor"]], "Chapter 9: Breaking Free from Cloud Providers": [[1, "chapter-9-breaking-free-from-cloud-providers"]], "Appendix A: Tools and Resources": [[1, "appendix-a-tools-and-resources"]], "Challenges of Evaluating LLM-based Applications": [[2, "challenges-of-evaluating-llm-based-applications"]], "Non-Deterministic Machines": [[2, "non-deterministic-machines"]], "Temperature and Sampling": [[2, "temperature-and-sampling"]], "The Temperature Spectrum": [[2, "the-temperature-spectrum"]], "Emerging Properties": [[2, "emerging-properties"]], "Problem Statement": [[2, "problem-statement"], [3, "problem-statement"], [4, "problem-statement"]], "Evals of Traditional Software vs LLMs": [[2, "evals-table"]], "Evals Design": [[2, "evals-design"]], "Conceptual Overview": [[2, "conceptual-overview"]], "Design Considerations": [[2, "design-considerations"]], "Key Components": [[2, "key-components"]], "Metrics": [[2, "metrics"]], "Key Metrics for Evaluating Generative Tasks": [[2, "key-metrics"]], "Example: BLEU and ROUGE for SEC Filing Summarization": [[2, "example-bleu-and-rouge-for-sec-filing-summarization"]], "Considerations": [[2, "considerations"]], "Evaluators": [[2, "evaluators"]], "Model-Based Evaluation": [[2, "model-based-evaluation"]], "Human-Based Evaluation": [[2, "human-based-evaluation"]], "Leaderboard": [[2, "leaderboard"]], "Tools": [[2, "tools"]], "References": [[2, "references"], [3, "references"], [4, "references"]], "Output Size Limitations": [[3, "output-size-limitations"]], "What are Token Limits?": [[3, "what-are-token-limits"]], "Token Cost and Length Limitation Comparison Across Key Models": [[3, "token-cost-table"]], "Content Chunking with Contextual Linking": [[3, "content-chunking-with-contextual-linking"]], "Generating long-form content": [[3, "generating-long-form-content"]], "Step 1: Chunking the Content": [[3, "step-1-chunking-the-content"]], "Step 2: Writing the Base Prompt Template": [[3, "step-2-writing-the-base-prompt-template"]], "Step 3: Constructing Dynamic Prompt Parameters": [[3, "step-3-constructing-dynamic-prompt-parameters"]], "Step 4: Generating the Report": [[3, "step-4-generating-the-report"]], "Example Usage": [[3, "example-usage"]], "Discussion": [[3, "discussion"], [4, "discussion"]], "Implications": [[3, "implications"]], "Future Considerations": [[3, "future-considerations"]], "Conclusion": [[3, "conclusion"], [4, "conclusion"]], "Wrestling with Structured Output": [[4, "wrestling-with-structured-output"]], "The Structured Output Challenges": [[4, "the-structured-output-challenges"]], "Solutions": [[4, "solutions"]], "Strategies": [[4, "strategies"]], "Techniques and Tools": [[4, "techniques-and-tools"]], "One-Shot Prompts": [[4, "one-shot-prompts"]], "Structured Output with Provider-Specific APIs": [[4, "structured-output-with-provider-specific-apis"]], "JSON Mode": [[4, "json-mode"]], "LangChain": [[4, "langchain"]], "Outlines": [[4, "outlines"]], "A Simple Example: Multiple Choice Generation": [[4, "a-simple-example-multiple-choice-generation"]], "Pydantic model": [[4, "pydantic-model"]], "Comparing Solutions": [[4, "comparing-solutions"]], "Best Practices": [[4, "best-practices"]], "Ongoing Debate on LLMs Structured Output": [[4, "ongoing-debate-on-llms-structured-output"]], "Acknowledgements": [[4, "acknowledgements"]]}, "indexentries": {}}) \ No newline at end of file +Search.setIndex({"docnames": ["markdown/intro", "markdown/toc", "notebooks/evals", "notebooks/output_size_limit", "notebooks/structured_output"], "filenames": ["markdown/intro.md", "markdown/toc.md", "notebooks/evals.ipynb", "notebooks/output_size_limit.ipynb", "notebooks/structured_output.ipynb"], "titles": ["1. Introduction", "Taming Large Language Models with Open Source Software", "4. Challenges of Evaluating LLM-based Applications", "2. Output Size Limitations", "3. Wrestling with Structured Output"], "terms": {"am": 0, "alwai": [0, 2, 4], "do": [0, 2, 3, 4], "which": [0, 2, 3, 4], "cannot": [0, 2], "order": [0, 2, 4], "mai": [0, 2, 3, 4], "learn": [0, 2, 4], "how": [0, 2, 3, 4], "pablo": 0, "picasso": 0, "In": [0, 2, 3, 4], "recent": [0, 4], "year": [0, 2, 3, 4], "larg": [0, 2, 3, 4], "languag": [0, 2, 3, 4], "model": 0, "llm": [0, 3], "have": [0, 2, 3, 4], "emerg": [0, 1], "transform": [0, 2, 4], "forc": [0, 2], "technologi": [0, 2, 3, 4], "promis": 0, "revolution": 0, "build": [0, 1, 2, 3], "product": [0, 2, 4], "interact": [0, 2, 3, 4], "comput": [0, 2, 3], "from": [0, 2, 3, 4], "chatgpt": [0, 2], "github": [0, 4], "copilot": 0, "claud": [0, 2, 3], "artifact": 0, "cursor": 0, "com": [0, 4], "replit": 0, "other": [0, 2, 3, 4], "system": [0, 2, 3, 4], "captur": [0, 2], "public": 0, "imagin": 0, "spark": 0, "gold": 0, "rush": 0, "ai": [0, 4], "power": [0, 2, 3, 4], "applic": [0, 3, 4], "howev": [0, 2, 3, 4], "beneath": 0, "surfac": [0, 2], "technolog": 0, "revolut": 0, "li": 0, "complex": [0, 2, 3, 4], "landscap": [0, 2], "practition": 0, "must": [0, 2, 3], "navig": 0, "As": [0, 2, 3, 4], "explor": [0, 2, 4], "engin": [0, 2, 4], "effort": [0, 4], "requir": [0, 2, 3, 4], "manag": [0, 1, 2, 3, 4], "handl": [0, 1, 2, 3, 4], "non": [0, 1, 4], "determinist": [0, 1], "output": [0, 2], "prevent": [0, 2, 4], "hallucin": [0, 4], "overst": 0, "while": [0, 2, 3, 4], "potenti": [0, 2, 3, 4], "remain": [0, 2, 3], "compel": [0, 4], "understand": [0, 1, 2, 3, 4], "hidden": 0, "cost": [0, 2, 4], "reliabl": [0, 2, 4], "enabl": [0, 2, 3, 4], "u": [0, 2, 4], "fulli": [0, 2, 3], "har": [0, 3], "impact": [0, 2, 3, 4], "fortun": 0, "grow": [0, 2, 4], "ecosystem": 0, "open": [0, 2, 3, 4], "sourc": [0, 2, 4], "solut": [0, 1, 2, 3], "best": [0, 1, 2], "help": [0, 2, 3, 4], "tackl": 0, "limit": [0, 2, 4], "rang": [0, 2, 3, 4], "famili": 0, "llama": [0, 1], "mistral": 0, "framework": [0, 2, 4], "ollama": [0, 1], "langchain": [0, 1, 3], "packag": 0, "outlin": [0, 1, 2], "promptfoo": 0, "format": [0, 2, 3, 4], "llamafil": [0, 1], "txt": [0, 2, 3, 4], "hub": 0, "like": [0, 2, 3, 4], "huggingfac": [0, 2], "equip": 0, "battl": 0, "test": [0, 1, 2], "tool": [0, 3], "overcom": [0, 3], "develop": [0, 2, 3, 4], "launch": [0, 2], "robust": [0, 2, 3, 4], "base": [0, 4], "capabl": [0, 2, 3, 4], "ar": [0, 1, 2, 4], "inde": 0, "remark": 0, "prevail": 0, "narr": 0, "often": [0, 2, 3, 4], "gloss": 0, "over": [0, 2, 3, 4], "fundament": [0, 2], "problem": [0, 1], "organ": [0, 2, 3], "face": [0, 4], "when": [0, 2, 3, 4], "real": [0, 2, 3, 4], "world": [0, 2, 4], "aim": [0, 3, 4], "bridg": 0, "gap": 0, "offer": [0, 2, 4], "clear": [0, 2, 4], "ei": 0, "examin": [0, 3], "pitfal": [0, 1], "work": [0, 1, 2, 3, 4], "provid": [0, 2, 3], "them": [0, 2, 3, 4], "throughout": [0, 3, 4], "follow": [0, 2, 3, 4], "exhaust": 0, "list": [0, 2, 3, 4], "critic": [0, 2, 3, 4], "behavior": [0, 2], "unlik": [0, 2], "tradit": 0, "softwar": [0, 4], "can": [0, 2, 3, 4], "produc": [0, 2, 4], "differ": [0, 2, 3, 4], "ident": [0, 2], "input": [0, 2, 3, 4], "make": [0, 2, 3, 4], "assur": 0, "particularli": [0, 2, 3, 4], "structur": [0, 2, 3], "un": 0, "struggl": [0, 4], "maintain": [0, 2, 3, 4], "consist": [0, 2, 3, 4], "complic": 0, "integr": [0, 2, 4], "larger": [0, 2, 3], "error": [0, 4], "more": [0, 2, 3, 4], "These": [0, 2, 3], "gener": [0, 1], "plausibl": 0, "sound": 0, "entir": [0, 2, 3], "fabric": 0, "inform": [0, 2, 3, 4], "creat": [0, 2, 3, 4], "signific": [0, 2, 3, 4], "risk": [0, 2, 3], "optim": [0, 1, 3], "The": [0, 3], "financi": [0, 2, 3, 4], "oper": [0, 2, 3], "quickli": [0, 3], "becom": [0, 2], "prohibit": 0, "without": [0, 2, 3, 4], "care": [0, 2, 4], "methodologi": [0, 4], "break": [0, 2, 3], "down": [0, 2, 3], "deal": 0, "new": [0, 2, 3, 4], "take": [0, 2, 3, 4], "hand": [0, 3, 4], "concret": 0, "exampl": [0, 1], "you": [0, 2, 3, 4], "run": [0, 2, 4], "modifi": 0, "scenario": [0, 2], "strategi": [0, 1, 2, 3], "techniqu": [0, 1, 2, 3], "pattern": [0, 1, 2, 4], "anti": 0, "look": [0, 2], "our": [0, 2, 3, 4], "goal": [0, 3], "discourag": 0, "us": [0, 2, 3, 4], "implement": [0, 1, 2, 3, 4], "By": [0, 3, 4], "upfront": 0, "better": [0, 3], "leverag": [0, 2, 3, 4], "effect": [0, 2, 3, 4], "avoid": [0, 2, 4], "current": [0, 3], "discours": 0, "around": [0, 3, 4], "tend": [0, 2], "toward": [0, 2], "extrem": 0, "either": [0, 3], "uncrit": 0, "enthusiasm": 0, "wholesal": 0, "dismiss": 0, "focu": [0, 2, 3], "rather": [0, 2], "than": [0, 2], "theoret": 0, "first": [0, 2, 3], "everi": 0, "concept": 0, "illustr": [0, 2, 3], "execut": [0, 2], "immedi": 0, "analysi": [0, 1, 2, 3], "balanc": [0, 2, 3, 4], "both": [0, 2], "reader": 0, "decis": [0, 4], "inted": 0, "step": [0, 1, 2, 4], "It": [0, 2, 3, 4], "insight": [0, 2, 3, 4], "along": [0, 2], "guidanc": [0, 4], "could": [0, 2, 3], "derail": 0, "project": 0, "earli": 0, "befor": 0, "thei": [0, 2, 3, 4], "costli": 0, "too": [0, 2, 3], "late": 0, "lifecycl": 0, "broader": 0, "audienc": 0, "includ": [0, 2, 3, 4], "technic": [0, 2, 3], "lead": [0, 2, 3, 4], "initi": [0, 2, 3], "leader": 0, "architectur": [0, 3], "anyon": 0, "seek": 0, "typic": [0, 2, 3], "job": 0, "role": [0, 2, 3, 4], "platform": [0, 3], "backend": 0, "exist": [0, 2], "ml": 0, "transit": [0, 2, 3], "overse": 0, "genai": 0, "motiv": 0, "need": [0, 2, 3, 4], "readi": [0, 2], "desir": [0, 2, 4], "perform": [0, 1, 2, 3, 4], "ensur": [0, 2, 3, 4], "safeti": [0, 4], "after": [0, 3], "read": [0, 2, 3, 4], "implic": [0, 1, 2], "experi": [0, 2, 3, 4], "recommend": [0, 3, 4], "abl": [0, 3, 4], "deploi": [0, 3], "proper": [0, 4], "safeguard": 0, "realist": 0, "estim": 0, "timelin": 0, "To": [0, 2, 3, 4], "most": [0, 2, 3, 4], "should": [0, 2, 3, 4], "basic": [0, 2, 3], "program": [0, 2], "knowledg": [0, 2], "introductori": 0, "e": [0, 2, 3, 4], "g": [0, 2, 3, 4], "chat": [0, 2, 3, 4], "prompt": [0, 1, 2], "templat": [0, 1, 2], "access": [0, 4], "openai": [0, 2, 4], "anthrop": [0, 4], "similar": [0, 2, 4], "grade": 0, "dive": 0, "here": [0, 2, 3, 4], "get": [0, 2, 3, 4], "start": 0, "activ": [0, 2], "virtual": 0, "m": 0, "venv": 0, "env": [0, 2, 3, 4], "bin": 0, "On": [0, 4], "window": [0, 1], "script": 0, "instal": [0, 2, 4], "pip": [0, 2, 4], "r": [0, 2, 3, 4], "file": [0, 3, 4], "root": 0, "directori": 0, "add": [0, 3], "sensit": [0, 2], "openai_api_kei": 0, "your_openai_api_key_her": 0, "never": 0, "share": [0, 2, 4], "commit": 0, "version": [0, 2, 4], "control": [0, 2, 4], "contain": [0, 2, 3], "kept": 0, "privat": 0, "clone": 0, "companion": 0, "git": 0, "http": [0, 2, 4], "souzatharsi": 0, "tamingllm": 0, "cd": 0, "If": [0, 2, 4], "encount": 0, "rate": [0, 2], "consid": [0, 2, 3, 4], "smaller": [0, 2, 3, 4], "retri": [0, 4], "logic": [0, 2, 3], "conflict": 0, "try": [0, 2, 4], "fresh": 0, "poetri": 0, "check": [0, 2], "page": 0, "known": [0, 2, 4], "now": [0, 2, 3, 4], "let": [0, 2, 3, 4], "begin": 0, "dr": 0, "tharsi": 0, "souza": 0, "scientist": 0, "special": [0, 2, 4], "he": 0, "lectur": 0, "columbia": 0, "univers": 0, "master": 0, "scienc": [0, 2], "appli": [0, 2, 3], "analyt": 0, "head": [0, 3], "equiti": 0, "citadel": 0, "former": [0, 2], "senior": 0, "vp": 0, "two": [0, 2, 3, 4], "sigma": 0, "invest": [0, 2, 4], "With": 0, "15": [0, 2, 4], "deliv": 0, "across": [0, 2], "startup": 0, "500": 0, "compani": [0, 2, 3, 4], "global": 0, "also": [0, 2, 3, 4], "an": [0, 2, 3, 4], "numer": [0, 2], "scholarli": 0, "frequent": 0, "speaker": 0, "academ": 0, "busi": [0, 2], "confer": 0, "ground": [0, 1, 2], "background": [0, 3], "draw": 0, "scale": [0, 2, 4], "stage": 0, "major": [0, 4], "institut": 0, "well": [0, 2, 4], "advis": 0, "profit": [0, 3, 4], "contribut": [0, 3], "bring": 0, "uniqu": [0, 2], "between": [0, 2, 3], "next": [0, 2, 4], "hold": 0, "ph": 0, "d": [0, 4], "ucl": 0, "london": 0, "phil": 0, "sc": 0, "b": [0, 4], "practic": [1, 2, 3], "guid": [1, 2, 4], "python": [1, 4], "core": [1, 2], "we": [1, 2, 3, 4], "ll": [1, 2], "address": [1, 2, 3, 4], "approach": [1, 2, 3, 4], "note": [1, 2, 3, 4], "perspect": 1, "who": [1, 3], "thi": [1, 2, 3, 4], "book": 1, "i": [1, 2, 3, 4], "For": [1, 2, 3, 4], "outcom": [1, 2], "prerequisit": 1, "set": [1, 2, 3, 4], "up": [1, 2, 3, 4], "your": [1, 3, 4], "environ": [1, 2, 3, 4], "setup": [1, 2], "api": [1, 2], "kei": [1, 4], "configur": [1, 2], "code": [1, 2, 4], "repositori": 1, "troubleshoot": 1, "common": [1, 2, 3, 4], "issu": [1, 2, 3, 4], "about": [1, 2, 3, 4], "author": 1, "": [1, 2, 3, 4], "statement": 1, "One": [1, 2], "shot": 1, "specif": [1, 2, 3], "json": 1, "mode": 1, "multipl": [1, 2, 3], "choic": [1, 2], "pydant": [1, 2], "discuss": [1, 2], "compar": [1, 2, 3], "research": [1, 2, 3, 4], "ongo": 1, "debat": 1, "conclus": [1, 2], "acknowledg": 1, "refer": 1, "context": [1, 2, 3, 4], "constraint": [1, 3], "long": [1, 2], "token": [1, 2, 4], "chunk": 1, "form": [1, 2, 4], "content": 1, "what": [1, 2, 4], "contextu": [1, 2], "link": 1, "write": [1, 2], "construct": 1, "dynam": [1, 2], "paramet": [1, 2, 4], "report": [1, 2, 4], "usag": [1, 2], "futur": [1, 2], "consider": [1, 4], "machin": 1, "temperatur": [1, 3], "sampl": [1, 3, 4], "spectrum": 1, "properti": 1, "eval": 1, "design": [1, 3, 4], "conceptu": [1, 4], "overview": [1, 4], "compon": 1, "metric": 1, "human": [1, 3, 4], "leaderboard": 1, "type": [1, 2, 3, 4], "detect": [1, 4], "retriev": 1, "augment": [1, 2], "rag": 1, "select": [1, 2], "index": [1, 3], "vector": 1, "store": [1, 3], "method": [1, 2, 3, 4], "pipelin": 1, "valid": [1, 2, 4], "guard": 1, "filter": 1, "sanit": 1, "monitor": 1, "alert": 1, "cach": 1, "invalid": [1, 4], "predict": [1, 2, 4], "vendor": [1, 2], "lock": 1, "self": 1, "host": 1, "deploy": 1, "migrat": 1, "commun": 1, "surprisingli": 2, "all": [2, 3, 4], "greg": 2, "brockman": 2, "presid": 2, "natur": [2, 3, 4], "where": [2, 3], "same": [2, 3, 4], "respons": [2, 3, 4], "each": [2, 3], "time": [2, 3, 4], "re": [2, 3, 4], "queri": 2, "even": [2, 3, 4], "data": [2, 3, 4], "characterist": 2, "strength": 2, "ask": [2, 4], "ani": [2, 3, 4], "question": [2, 4], "isn": 2, "t": [2, 3, 4], "bug": 2, "featur": [2, 4], "random": 2, "allow": [2, 3, 4], "creativ": [2, 4], "divers": [2, 3, 4], "incredibli": 2, "difficult": 2, "testabl": 2, "servic": [2, 3, 4], "advic": 2, "mean": [2, 3, 4], "market": [2, 3, 4], "yield": 2, "exceedingli": 2, "regulatori": 2, "complianc": [2, 4], "guarante": [2, 4], "user": [2, 3, 4], "trust": 2, "affect": 2, "inconsist": [2, 4], "primari": 2, "determin": [2, 3, 4], "come": [2, 3, 4], "dure": 2, "text": [2, 3, 4], "calcul": 2, "probabl": [2, 4], "distribut": [2, 4], "nucleu": 2, "coher": [2, 3], "0": [2, 3, 4], "repetit": [2, 3], "1": [2, 4], "increas": [2, 3, 4], "incoher": 2, "dotenv": [2, 3, 4], "import": [2, 3, 4], "load_dotenv": [2, 3, 4], "o": [2, 3, 4], "load": [2, 3, 4], "variabl": [2, 3, 4], "panda": 2, "pd": 2, "def": [2, 3, 4], "generate_respons": 2, "model_nam": [2, 3], "str": [2, 3, 4], "float": [2, 3], "attempt": [2, 3], "int": [2, 3], "3": [2, 4], "datafram": 2, "demonstr": [2, 3, 4], "client": [2, 4], "result": [2, 3, 4], "temp": 2, "complet": [2, 3, 4], "messag": [2, 4], "max_token": 2, "50": 2, "append": [2, 3], "displai": 2, "group": [2, 3], "df_result": 2, "print": [2, 3, 4], "f": [2, 3, 4], "ntemperatur": 2, "40": 2, "temp_respons": 2, "_": 2, "row": 2, "iterrow": 2, "return": [2, 3, 4], "max_length": [2, 4], "10000": [2, 3, 4], "length": [2, 4], "appl": [2, 3, 4], "sec_fil": [2, 4], "gpt": [2, 3, 4], "5": [2, 3, 4], "turbo": [2, 3, 4], "singl": [2, 3, 4], "summari": [2, 4], "2": [2, 4], "inc": [2, 3, 4], "its": [2, 3, 4], "10": [2, 3, 4], "k": [2, 3, 4], "fiscal": [2, 3], "end": [2, 3], "septemb": [2, 3], "28": [2, 3], "2024": [2, 3, 4], "detail": [2, 3, 4], "season": 2, "issuer": 2, "california": [2, 4], "manufactur": 2, "smartphon": 2, "person": [2, 4], "tablet": 2, "wearabl": [2, 4], "accessori": 2, "innov": [2, 3], "condit": 2, "secur": [2, 3], "exchang": [2, 3], "commiss": [2, 3], "factor": [2, 3], "invdestacksmeticsisdict": 2, "setispect": 2, "20cyan": 2, "evaluationseld": 2, "anvis": 2, "droitent": 2, "discernminerv": 2, "versbobprefvers": 2, "vo\u8be5": 2, "option\u548c": 2, "meio": 2, "forecast": 2, "\u0432\u0440\u0435\u043ccisco": 2, "dellaischenpoihscap": 2, "geme": 2, "gettim": 2, "comprehens": [2, 3, 4], "simpl": [2, 3], "reveal": 2, "dramat": [2, 4], "alter": 2, "wai": [2, 3, 4], "systemat": 2, "assess": [2, 3], "At": 2, "rigid": 2, "vari": 2, "less": 2, "wildli": 2, "inadequ": 2, "profound": 2, "one": [2, 3, 4], "radic": 2, "reli": [2, 4], "grappl": 2, "probabilist": 2, "lower": [2, 4], "seem": [2, 4], "safer": 2, "don": [2, 3, 4], "elimin": 2, "underli": [2, 4], "uncertainti": 2, "mere": 2, "mask": 2, "highlight": [2, 3, 4], "paradigm": 2, "aspect": [2, 3, 4], "beyond": 2, "present": [2, 3, 4], "anoth": 2, "fascin": 2, "abil": [2, 4], "spontan": 2, "aris": 2, "size": [2, 4], "answer": [2, 3, 4], "reason": [2, 3, 4], "aren": 2, "explicitli": 2, "train": 2, "against": 2, "wtb": 2, "22": 2, "fig": [2, 3, 4], "4": 2, "relationship": 2, "linear": 2, "below": [2, 3], "certain": [2, 3, 4], "threshold": 2, "absent": 2, "simpli": [2, 3, 4], "matter": 2, "much": 2, "coax": 2, "out": [2, 3], "onc": [2, 3], "reach": [2, 3, 4], "point": [2, 3], "journei": 2, "suddenli": 2, "manifest": 2, "call": [2, 3, 4], "phase": 2, "shift": 2, "inabl": 2, "unpredict": [2, 4], "stand": 2, "stark": 2, "contrast": 2, "deliber": 2, "convent": 2, "stabl": 2, "suit": 2, "defin": [2, 3, 4], "accept": 2, "criteria": 2, "contend": 2, "constantli": 2, "7b": 2, "70b": 2, "ha": [2, 4], "rethink": 2, "custom": [2, 4], "support": [2, 4], "chatbot": 2, "would": [2, 3, 4], "refund": 2, "request": [2, 3, 4], "track": 2, "verifi": [2, 4], "function": [2, 3, 4], "But": 2, "just": [2, 3, 4], "predefin": [2, 4], "convers": [2, 3, 4], "appropri": [2, 3, 4], "emot": 2, "rais": [2, 3], "measur": 2, "weren": 2, "evolv": [2, 3], "accuraci": 2, "subject": 2, "qualiti": [2, 3, 4], "kind": 2, "account": 2, "sever": [2, 3, 4], "dimens": 2, "necessirali": 2, "pre": 2, "extend": 2, "explicit": [2, 4], "usual": 2, "precis": 2, "involv": [2, 4], "resist": 2, "straightforward": [2, 3], "quantif": 2, "score": [2, 4], "judgment": 2, "inher": [2, 3, 4], "depend": 2, "dataset": 2, "contamin": 2, "carefulli": [2, 4], "craft": [2, 4], "case": [2, 3, 4], "expect": [2, 3, 4], "unit": [2, 3], "massiv": 2, "internet": 2, "alreadi": 2, "seen": 2, "memor": 2, "artifici": 2, "inflat": 2, "curat": 2, "truli": 2, "unseen": 2, "rigor": 2, "cross": 2, "benchmark": 2, "evolut": 2, "continu": [2, 3, 4], "advanc": [2, 3], "longitudin": 2, "comparison": [2, 4], "obsolet": 2, "older": 2, "autom": [2, 4], "demand": 2, "oversight": 2, "bias": [2, 4], "through": [2, 3, 4], "annot": 2, "review": [2, 4], "process": [2, 3, 4], "mostli": 2, "distinct": 2, "versu": 2, "latter": 2, "foundat": [2, 3], "purpos": [2, 4], "tailor": 2, "particular": [2, 4], "combin": [2, 3], "associ": [2, 3], "solv": [2, 4], "That": [2, 4], "differenti": 2, "becaus": 2, "chang": 2, "scope": [2, 3], "thing": [2, 4], "meet": 2, "close": 2, "ti": 2, "align": [2, 3, 4], "object": [2, 4], "A": [2, 3], "great": [2, 4], "doesn": [2, 3, 4], "observ": [2, 4], "three": 2, "app": 2, "imag": 2, "audio": 2, "etc": [2, 4], "truth": 2, "option": [2, 3, 4], "standard": 2, "layer": [2, 3, 4], "repres": [2, 4], "palm": 2, "individu": [2, 3], "target": [2, 4], "further": [2, 3, 4], "see": [2, 4], "avail": [2, 3, 4], "addition": 2, "shown": 2, "fix": [2, 3], "default": [2, 4], "quantifi": 2, "rank": 2, "easi": [2, 3], "addit": [2, 3], "quantit": 2, "among": 2, "per": [2, 3], "aggreg": 2, "heavili": 2, "plan": 2, "pertain": 2, "previous": [2, 3], "doe": [2, 3, 4], "cover": [2, 3], "edg": 2, "good": [2, 4], "bia": 2, "separ": [2, 3], "synthet": 2, "updat": [2, 3], "reflect": 2, "post": 2, "fair": 2, "timeout": 2, "variat": 2, "maxim": 2, "valu": [2, 3, 4], "success": 2, "inter": 2, "rater": 2, "scalabl": [2, 3], "weight": 2, "rel": 2, "priorit": 2, "normal": [2, 4], "absolut": [2, 4], "fail": 2, "confid": [2, 4], "interv": 2, "veri": 2, "tier": 2, "hollist": 2, "built": [2, 4], "mind": 2, "x": 2, "fast": 2, "promot": 2, "rapid": 2, "experiment": [2, 4], "iter": [2, 3], "final": [2, 3, 4], "keep": [2, 3], "itself": 2, "confirm": 2, "vi": 2, "categor": 2, "broad": 2, "categori": 2, "intrins": 2, "extrins": 2, "sequenc": 2, "perplex": 2, "given": [2, 3, 4], "variou": [2, 3, 4], "downstream": [2, 4], "directli": [2, 4], "valuabl": [2, 4], "generalis": 2, "interest": [2, 3, 4], "sinc": [2, 3], "think": 2, "term": [2, 3], "discrimin": 2, "distinguish": 2, "classifi": 2, "sentiment": [2, 4], "classif": [2, 4], "identifi": [2, 3, 4], "whether": [2, 3, 4], "true": [2, 3, 4], "fals": [2, 4], "synthesi": 2, "log": 2, "discret": 2, "recal": 2, "f1": 2, "match": [2, 4], "exact": 2, "prefix": 2, "translat": 2, "charact": [2, 3], "n": [2, 3], "gram": 2, "being": [2, 4], "guidelin": 2, "subsequ": 2, "section": [2, 3, 4], "tabl": [2, 3], "short": [2, 3], "wide": [2, 3, 4], "definit": [2, 4], "bilingu": 2, "understudi": 2, "overlap": [2, 3], "favor": [2, 4], "due": [2, 3], "breviti": 2, "penalti": 2, "insensit": 2, "semant": [2, 3], "high": [2, 3], "orient": 2, "gist": 2, "word": [2, 3, 4], "sentenc": [2, 3, 4], "focus": [2, 3, 4], "ignor": 2, "equival": 2, "influenc": 2, "meteor": 2, "synonym": 2, "stem": 2, "paraphras": 2, "alongsid": 2, "computation": [2, 3], "expens": 2, "databas": [2, 4], "cider": 2, "consensu": 2, "descript": [2, 4], "tf": 2, "idf": 2, "caption": 2, "outsid": 2, "reliant": 2, "corpu": 2, "statist": 2, "ter": 2, "edit": 2, "number": [2, 3, 4], "convert": [2, 4], "hypothesi": 2, "correct": [2, 4], "penal": 2, "bertscor": 2, "embed": [2, 3], "bert": 2, "spice": 2, "proposit": 2, "scene": 2, "graph": 2, "emphasi": 2, "onli": [2, 3, 4], "pure": 2, "textual": 2, "want": [2, 3], "extract": [2, 3, 4], "analyst": [2, 3], "prepar": [2, 3], "dictionari": [2, 4], "rouge_1": 2, "rouge_2": 2, "ideal": [2, 4], "humana": 2, "expert": [2, 3, 4], "cheaper": 2, "4o": [2, 3, 4], "mini": [2, 3, 4], "evaluate_summari": 2, "unigram": 2, "bigram": 2, "librari": [2, 3, 4], "absl": 2, "py": 2, "rouge_scor": 2, "generated_summari": 2, "reference_summari": 2, "arg": [2, 3], "dict": [2, 3], "google_bleu": 2, "bleu_scor": 2, "rouge1": 2, "rouge2": 2, "instanc": [2, 3], "arbitrari": 2, "chosen": 2, "sentence1": 2, "cat": 2, "sat": 2, "mat": 2, "sentence2": 2, "ate": 2, "3333333333333333": 2, "7272727272727272": 2, "4444444444444445": 2, "generate_summari": 2, "summir": 2, "correspond": [2, 4], "liner": 2, "excerpt": 2, "evaluate_summary_model": 2, "model_benchmark": 2, "models_test": 2, "benchmark_summari": 2, "model_summari": 2, "evaluation_result": 2, "line": 2, "name": [2, 3, 4], "zip": 2, "annual": 2, "regul": [2, 4], "stock": [2, 4], "corpor": 2, "govern": 2, "forward": 2, "analyz": [2, 3], "statu": 2, "concis": 2, "yet": [2, 3], "omit": [2, 4], "essenti": [2, 3], "element": [2, 4], "Its": 2, "adequ": 2, "verbos": 2, "relev": 2, "introduc": [2, 3, 4], "peripher": 2, "quit": [2, 4], "factual": 2, "overli": [2, 4], "simplifi": [2, 4], "miss": 2, "convei": [2, 3], "breadth": 2, "Of": 2, "cours": 2, "abov": 2, "vibe": 2, "visualize_prompt_comparison": 2, "visual": 2, "matplotlib": 2, "radar": 2, "plot": 2, "radar_plot": 2, "show": [2, 3, 4], "tmp": 2, "ipykernel_1652501": 2, "940173201": 2, "userwarn": 2, "figurecanvasagg": 2, "thu": 2, "put": 2, "closest": 2, "largest": 2, "deviat": [2, 4], "suggest": [2, 4], "least": 2, "establish": 2, "otherwis": 2, "qualit": 2, "driven": 2, "might": [2, 3, 4], "nuanc": [2, 3, 4], "especi": [2, 3, 4], "those": [2, 3, 4], "primarili": 2, "level": [2, 3, 4], "granular": [2, 3], "altern": [2, 3], "fall": 2, "judg": 2, "themselv": 2, "act": 2, "fluenci": 2, "interpret": 2, "refin": 2, "centric": 2, "flow": [2, 3], "express": [2, 4], "plain": 2, "ad": 2, "basemodel": [2, 4], "class": [2, 3, 4], "judgeevalu": 2, "expertis": 2, "evaluate_with_llm": 2, "judge_model": 2, "candidate_summari": 2, "candid": 2, "specifi": [2, 3, 4], "wa": [2, 4], "written": 2, "grammat": 2, "y": 2, "z": 2, "w": [2, 3], "beta": [2, 4], "pars": [2, 4], "response_format": [2, 4], "benchmark_model": 2, "test_model": 2, "input_text": [2, 3], "tupl": 2, "iphon": [2, 4], "mac": [2, 4], "ipad": [2, 4], "incorpor": 2, "regard": 2, "regist": 2, "approxim": [2, 4], "6": [2, 3, 4], "trillion": [2, 4], "held": [2, 4], "affili": [2, 4], "billion": 2, "outstand": [2, 4], "octob": [2, 4], "18": [2, 4], "7": [2, 3], "8": [2, 3], "evals_list": 2, "1775618912": 2, "14": [2, 4], "difficulti": 2, "own": [2, 3], "overhead": 2, "infer": 2, "rational": 2, "effici": [2, 3, 4], "replac": 2, "significantli": [2, 3], "workflow": 2, "improv": [2, 3, 4], "domain": 2, "assessor": 2, "resourc": [2, 3], "intens": [2, 4], "standardis": 2, "varieti": 2, "some": [2, 3, 4], "popular": 2, "glue": 2, "collect": [2, 3], "entail": 2, "holist": 2, "view": 2, "superglu": 2, "successor": 2, "push": 2, "boundari": 2, "big": 2, "bench": 2, "collabor": 2, "encompass": 2, "sens": 2, "helm": 2, "flask": 2, "multitask": 2, "mmlu": 2, "57": 2, "social": 2, "humanev": 2, "livebench": 2, "mitig": [2, 3, 4], "releas": [2, 4], "monthli": 2, "leak": 2, "incorrect": 2, "ambigu": 2, "rememb": 2, "jason": 2, "wei": 2, "yi": [2, 4], "tai": 2, "rishi": 2, "bommasani": 2, "colin": 2, "raffel": 2, "barret": 2, "zoph": 2, "sebastian": 2, "borgeaud": 2, "dani": 2, "yogatama": 2, "maarten": 2, "bosma": 2, "denni": 2, "zhou": 2, "donald": 2, "metzler": 2, "ed": 2, "h": 2, "chi": 2, "tatsunori": 2, "hashimoto": 2, "oriol": 2, "vinyal": 2, "perci": 2, "liang": 2, "jeff": 2, "dean": 2, "william": 2, "fedu": 2, "2022": 2, "url": [2, 4], "arxiv": [2, 4], "org": [2, 4], "ab": [2, 4], "2206": 2, "07682": 2, "go": [3, 4], "far": 3, "possibli": 3, "find": [3, 4], "eliot": 3, "english": 3, "rule": 3, "thumb": 3, "\u00be": 3, "max_output_token": 3, "modern": 3, "maximum": 3, "4096": 3, "16384": 3, "contrari": 3, "summar": 3, "surpass": 3, "instead": [3, 4], "stop": 3, "mid": 3, "truncat": 3, "max_input_token": 3, "input_cost_per_token": 3, "output_cost_per_token": 3, "meta": 3, "llama3": 3, "11b": 3, "instruct": [3, 4], "v1": 3, "128000": 3, "5e": 3, "sonnet": 3, "20241022": 3, "8192": 3, "200000": 3, "3e": 3, "0613": 3, "6e": 3, "04": 3, "09": 3, "1e": 3, "gemini": 3, "flash": 3, "002": 3, "1048576": 3, "pro": 3, "2097152": 3, "05e": 3, "pose": [3, 4], "challeng": 3, "incomplet": 3, "extens": [3, 4], "articl": 3, "abruptli": 3, "cut": 3, "off": [3, 4], "disrupt": 3, "shallow": 3, "thorough": 3, "receiv": 3, "partial": 3, "dissatisfact": 3, "frustrat": 3, "educ": 3, "creation": 3, "feasibl": 3, "split": 3, "previou": 3, "10k": 3, "sec": [3, 4], "schemat": 3, "represent": 3, "diagram": 3, "charactertextsplitt": 3, "tiktoken": 3, "sequenti": 3, "chain": 3, "newlin": 3, "There": 3, "situat": 3, "broadli": [3, 4], "decid": 3, "sure": 3, "lost": 3, "path": 3, "mani": [3, 4], "cheap": 3, "speciali": 3, "awar": 3, "advantag": [3, 4], "sophist": 3, "naiv": 3, "period": 3, "nltk": 3, "spaci": 3, "recurs": 3, "divid": 3, "hierarch": 3, "manner": [3, 4], "made": [3, 4], "talk": 3, "theme": 3, "topic": [3, 4], "util": 3, "count": 3, "get_chunk": 3, "chunk_siz": 3, "chunk_overlap": 3, "langchain_text_splitt": 3, "text_splitt": 3, "from_tiktoken_encod": 3, "split_text": 3, "serv": [3, 4], "persona": 3, "assum": 3, "task": [3, 4], "action": 3, "actual": [3, 4], "langchain_cor": [3, 4], "prompttempl": 3, "get_base_prompt_templ": 3, "base_prompt": 3, "from_templ": 3, "llmchain": 3, "togeth": 3, "parser": [3, 4], "output_pars": 3, "stroutputpars": 3, "langchain_commun": 3, "chat_model": 3, "chatlitellm": 3, "get_llm_chain": 3, "prompt_templ": [3, 4], "llm_chain": [3, 4], "api_key_label": 3, "upper": 3, "_api_kei": 3, "api_kei": 3, "get_dynamic_prompt_templ": 3, "get_dynamic_prompt_param": 3, "prompt_param": 3, "part_idx": 3, "total_part": 3, "chat_context": 3, "origin": [3, 4], "part": [3, 4], "total": [3, 4], "param": 3, "dynamic_prompt_param": 3, "copi": 3, "save": 3, "introduct": 3, "elif": 3, "last": [3, 4], "second": 3, "main": [3, 4], "els": 3, "merg": 3, "concaten": 3, "generate_report": 3, "input_cont": 3, "llm_model_nam": 3, "report_part": 3, "num_part": 3, "len": 3, "dinam": 3, "priovid": 3, "enumer": 3, "invok": [3, 4], "cummul": 3, "join": 3, "max_chunk_s": 3, "max_chunk_overlap": 3, "latest": [3, 4], "readabl": 3, "move": 3, "local": [3, 4], "apple_report": 3, "300": 3, "posit": [3, 4], "disclos": 3, "state": 3, "luation": 3, "oblig": 3, "cash": 3, "disciplin": 3, "deeper": 3, "few": [3, 4], "smooth": 3, "upon": 3, "subhead": 3, "clariti": 3, "document": [3, 4], "adher": [3, 4], "revenu": [3, 4], "segment": [3, 4], "liquid": 3, "capit": [3, 4], "inclus": 3, "despit": [3, 4], "depth": 3, "evalu": 3, "overlook": 3, "fit": 3, "within": [3, 4], "meaning": [3, 4], "preserv": 3, "easier": [3, 4], "preprocess": 3, "enhanc": 3, "necessit": 3, "meticul": 3, "retain": 3, "necessari": 3, "seamlessli": 3, "circumv": 3, "therebi": 3, "overal": [3, 4], "escal": 3, "frequenc": 3, "volum": 3, "bottleneck": 3, "latenc": 3, "reduc": 3, "friendli": 3, "mustafa": 3, "suleyman": 3, "infinit": 3, "memori": 3, "amount": [3, 4], "fewer": 3, "compress": 3, "progress": 3, "condens": 3, "adapt": 3, "adjust": [3, 4], "flexibl": [3, 4], "constrain": [3, 4], "versatil": 3, "drive": 3, "grace": 3, "fallback": 3, "empow": 3, "crucial": [3, 4], "stai": 3, "full": [3, 4], "splitter": 3, "freedom": 4, "thrive": 4, "julia": 4, "cameron": 4, "excel": 4, "easili": 4, "sometim": 4, "unstructur": 4, "notebook": 4, "overrid": 4, "response_cont": 4, "wow": 4, "lot": 4, "breakdown": 4, "stream": 4, "portfolio": 4, "impress": 4, "trend": 4, "notic": 4, "march": 4, "29": 4, "huge": 4, "investor": 4, "figur": 4, "compli": 4, "ye": 4, "accur": 4, "date": 4, "transpar": 4, "industri": 4, "serious": 4, "is_json": 4, "myjson": 4, "except": 4, "valueerror": 4, "clearli": 4, "obtain": 4, "lack": 4, "emploi": 4, "schema": 4, "blueprint": 4, "achiev": 4, "nativ": 4, "regular": 4, "dedic": 4, "enforc": 4, "json_format": 4, "person1": 4, "alic": 4, "q1": 4, "20": 4, "person2": 4, "bob": 4, "net": 4, "margin": 4, "materi": 4, "though": 4, "suffici": 4, "nest": 4, "restrict": 4, "todai": 4, "programmat": 4, "depict": 4, "thellm": 4, "via": 4, "unend": 4, "whitespac": 4, "until": 4, "forget": 4, "throw": 4, "string": 4, "appear": 4, "somewher": 4, "json_object": 4, "628": 4, "553": 4, "000": 4, "sheer": 4, "mention": 4, "115": 4, "823": 4, "circul": 4, "plai": 4, "googl": 4, "vertex": 4, "suppli": 4, "so": 4, "worri": 4, "enum": 4, "benefit": 4, "No": 4, "incorrectli": 4, "refus": 4, "simpler": 4, "strongli": 4, "entiti": 4, "ii": 4, "place": 4, "doc": 4, "07": 4, "08": 4, "06": 4, "later": 4, "secextract": 4, "mentioned_ent": 4, "mentioned_plac": 4, "extract_from_sec_fil": 4, "sec_filing_text": 4, "explan": 4, "hint": 4, "send": 4, "attribut": 4, "conform": 4, "prompt_extract": 4, "sec_extract": 4, "nasdaq": 4, "llc": 4, "washington": 4, "c": 4, "cupertino": 4, "usabl": 4, "beg": 4, "abstract": 4, "with_structured_output": 4, "runnabl": 4, "typeddict": 4, "qu": 4, "langchain_openai": 4, "chatopenai": 4, "chatprompttempl": 4, "extract_from_sec_filing_langchain": 4, "structured_llm": 4, "from_messag": 4, "sec_extraction_langchain": 4, "found": 4, "out24": 4, "under": 4, "hood": 4, "logit": 4, "raw": 4, "neural": 4, "network": 4, "prefer": 4, "fine": 4, "grain": 4, "regex": 4, "proprietari": 4, "vllm": 4, "qwen2": 4, "5b": 4, "lightweight": 4, "alibaba": 4, "cloud": 4, "strong": 4, "small": 4, "enough": 4, "hug": 4, "qwen": 4, "top": 4, "100": 4, "label": 4, "assist": 4, "neg": 4, "unexpect": 4, "malform": 4, "back": 4, "pass": 4, "modul": 4, "sec_extraction_outlin": 4, "zsp": 4, "zicorp": 4, "phenomenon": 4, "were": 4, "tune": 4, "simplic": 4, "v": 4, "greater": 4, "steeper": 4, "curv": 4, "wrapper": 4, "wider": 4, "structuredoutputpars": 4, "done": 4, "know": 4, "exactli": 4, "field": 4, "chanc": 4, "connect": 4, "highli": 4, "encourag": 4, "correctli": 4, "xml": 4, "area": 4, "me": 4, "speak": 4, "freeli": 4, "studi": 4, "twt": 4, "24": 4, "impos": 4, "evid": 4, "aid24": 4, "degrad": 4, "decod": 4, "multi": 4, "thought": 4, "strict": 4, "hinder": 4, "outweigh": 4, "team": 4, "rebutt": 4, "argu": 4, "reproduct": 4, "paper": 4, "paint": 4, "pictur": 4, "publicli": 4, "independ": 4, "verif": 4, "dot24": 4, "flaw": 4, "believ": 4, "led": 4, "inaccur": 4, "reconcil": 4, "uneven": 4, "didn": 4, "properli": 4, "conflat": 4, "argument": 4, "trade": 4, "surround": 4, "drawback": 4, "unlock": 4, "thank": 4, "pfiffer": 4, "hi": 4, "feedback": 4, "aider": 4, "html": 4, "dottxt": 4, "sai": 4, "demo": 4, "tree": 4, "safe": 4, "io": 4, "zhi": 4, "rui": 4, "tam": 4, "cheng": 4, "kuang": 4, "wu": 4, "lin": 4, "tsai": 4, "chieh": 4, "yen": 4, "hung": 4, "lee": 4, "yun": 4, "nung": 4, "chen": 4, "2408": 4, "02442": 4}, "objects": {}, "objtypes": {}, "objnames": {}, "titleterms": {"introduct": [0, 1], "content": [0, 2, 3, 4], "core": 0, "challeng": [0, 1, 2, 4], "we": 0, "ll": 0, "address": 0, "A": [0, 1, 4], "practic": [0, 4], "approach": 0, "note": 0, "perspect": 0, "who": 0, "thi": 0, "book": 0, "i": 0, "For": 0, "outcom": 0, "prerequisit": 0, "set": 0, "up": 0, "your": 0, "environ": 0, "python": 0, "setup": 0, "api": [0, 4], "kei": [0, 2, 3], "configur": 0, "code": 0, "repositori": 0, "troubleshoot": 0, "common": 0, "issu": 0, "about": 0, "author": 0, "": 0, "tame": 1, "larg": 1, "languag": 1, "model": [1, 2, 3, 4], "open": 1, "sourc": 1, "softwar": [1, 2], "chapter": 1, "1": [1, 3], "2": [1, 3], "wrestl": [1, 4], "structur": [1, 4], "output": [1, 3, 4], "3": [1, 3], "input": 1, "size": [1, 3], "length": [1, 3], "limit": [1, 3], "4": [1, 3], "5": 1, "evalu": [1, 2], "llm": [1, 2, 4], "base": [1, 2, 3], "applic": [1, 2], "6": 1, "hallucin": 1, "The": [1, 2, 4], "realiti": 1, "gap": 1, "7": 1, "safeti": 1, "concern": 1, "8": 1, "cost": [1, 3], "factor": 1, "9": 1, "break": 1, "free": 1, "from": 1, "cloud": 1, "provid": [1, 4], "appendix": 1, "tool": [1, 2, 4], "resourc": 1, "non": 2, "determinist": 2, "machin": 2, "temperatur": 2, "sampl": 2, "spectrum": 2, "emerg": 2, "properti": 2, "problem": [2, 3, 4], "statement": [2, 3, 4], "eval": 2, "tradit": 2, "v": 2, "design": 2, "conceptu": 2, "overview": 2, "consider": [2, 3], "compon": 2, "metric": 2, "gener": [2, 3, 4], "task": 2, "exampl": [2, 3, 4], "bleu": 2, "roug": 2, "sec": 2, "file": 2, "summar": 2, "human": 2, "leaderboard": 2, "refer": [2, 3, 4], "what": 3, "ar": 3, "token": 3, "comparison": 3, "across": 3, "chunk": 3, "contextu": 3, "link": 3, "long": 3, "form": 3, "step": 3, "write": 3, "prompt": [3, 4], "templat": 3, "construct": 3, "dynam": 3, "paramet": 3, "report": 3, "usag": 3, "discuss": [3, 4], "implic": 3, "futur": 3, "conclus": [3, 4], "solut": 4, "strategi": 4, "techniqu": 4, "One": 4, "shot": 4, "specif": 4, "json": 4, "mode": 4, "langchain": 4, "outlin": 4, "simpl": 4, "multipl": 4, "choic": 4, "pydant": 4, "compar": 4, "best": 4, "ongo": 4, "debat": 4, "acknowledg": 4}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinxcontrib.bibtex": 9, "sphinx": 57}, "alltitles": {"Introduction": [[0, "introduction"]], "Contents": [[0, "contents"], [2, "contents"], [3, "contents"], [4, "contents"]], "Core Challenges We\u2019ll Address": [[0, "core-challenges-we-ll-address"]], "A Practical Approach": [[0, "a-practical-approach"]], "A Note on Perspective": [[0, "a-note-on-perspective"]], "Who This Book Is For": [[0, "who-this-book-is-for"]], "Outcomes": [[0, "outcomes"]], "Prerequisites": [[0, "prerequisites"]], "Setting Up Your Environment": [[0, "setting-up-your-environment"]], "Python Environment Setup": [[0, "python-environment-setup"]], "API Keys Configuration": [[0, "api-keys-configuration"]], "Code Repository": [[0, "code-repository"]], "Troubleshooting Common Issues": [[0, "troubleshooting-common-issues"]], "About the Author(s)": [[0, "about-the-author-s"]], "Taming Large Language Models with Open Source Software": [[1, "taming-large-language-models-with-open-source-software"]], "Chapter 1: Introduction": [[1, "chapter-1-introduction"]], "Chapter 2: Wrestling with Structured Output": [[1, "chapter-2-wrestling-with-structured-output"]], "Chapter 3: Input Size and Length Limitations": [[1, "chapter-3-input-size-and-length-limitations"]], "Chapter 4: Output Size and Length Limitations": [[1, "chapter-4-output-size-and-length-limitations"]], "Chapter 5: Challenges of Evaluating LLM-based Applications": [[1, "chapter-5-challenges-of-evaluating-llm-based-applications"]], "Chapter 6: Hallucination: The Reality Gap": [[1, "chapter-6-hallucination-the-reality-gap"]], "Chapter 7: Safety Concerns": [[1, "chapter-7-safety-concerns"]], "Chapter 8: The Cost Factor": [[1, "chapter-8-the-cost-factor"]], "Chapter 9: Breaking Free from Cloud Providers": [[1, "chapter-9-breaking-free-from-cloud-providers"]], "Appendix A: Tools and Resources": [[1, "appendix-a-tools-and-resources"]], "Challenges of Evaluating LLM-based Applications": [[2, "challenges-of-evaluating-llm-based-applications"]], "Non-Deterministic Machines": [[2, "non-deterministic-machines"]], "Temperature and Sampling": [[2, "temperature-and-sampling"]], "The Temperature Spectrum": [[2, "the-temperature-spectrum"]], "Emerging Properties": [[2, "emerging-properties"]], "Problem Statement": [[2, "problem-statement"], [3, "problem-statement"], [4, "problem-statement"]], "Evals of Traditional Software vs LLMs": [[2, "evals-table"]], "Evals Design": [[2, "evals-design"]], "Conceptual Overview": [[2, "conceptual-overview"]], "Design Considerations": [[2, "design-considerations"]], "Key Components": [[2, "key-components"]], "Metrics": [[2, "metrics"]], "Key Metrics for Evaluating Generative Tasks": [[2, "key-metrics"]], "Example: BLEU and ROUGE for SEC Filing Summarization": [[2, "example-bleu-and-rouge-for-sec-filing-summarization"]], "Considerations": [[2, "considerations"]], "Evaluators": [[2, "evaluators"]], "Model-Based Evaluation": [[2, "model-based-evaluation"]], "Human-Based Evaluation": [[2, "human-based-evaluation"]], "Leaderboard": [[2, "leaderboard"]], "Tools": [[2, "tools"]], "References": [[2, "references"], [3, "references"], [4, "references"]], "Output Size Limitations": [[3, "output-size-limitations"]], "What are Token Limits?": [[3, "what-are-token-limits"]], "Token Cost and Length Limitation Comparison Across Key Models": [[3, "token-cost-table"]], "Content Chunking with Contextual Linking": [[3, "content-chunking-with-contextual-linking"]], "Generating long-form content": [[3, "generating-long-form-content"]], "Step 1: Chunking the Content": [[3, "step-1-chunking-the-content"]], "Step 2: Writing the Base Prompt Template": [[3, "step-2-writing-the-base-prompt-template"]], "Step 3: Constructing Dynamic Prompt Parameters": [[3, "step-3-constructing-dynamic-prompt-parameters"]], "Step 4: Generating the Report": [[3, "step-4-generating-the-report"]], "Example Usage": [[3, "example-usage"]], "Discussion": [[3, "discussion"], [4, "discussion"]], "Implications": [[3, "implications"]], "Future Considerations": [[3, "future-considerations"]], "Conclusion": [[3, "conclusion"], [4, "conclusion"]], "Wrestling with Structured Output": [[4, "wrestling-with-structured-output"]], "The Structured Output Challenges": [[4, "the-structured-output-challenges"]], "Solutions": [[4, "solutions"]], "Strategies": [[4, "strategies"]], "Techniques and Tools": [[4, "techniques-and-tools"]], "One-Shot Prompts": [[4, "one-shot-prompts"]], "Structured Output with Provider-Specific APIs": [[4, "structured-output-with-provider-specific-apis"]], "JSON Mode": [[4, "json-mode"]], "LangChain": [[4, "langchain"]], "Outlines": [[4, "outlines"]], "A Simple Example: Multiple Choice Generation": [[4, "a-simple-example-multiple-choice-generation"]], "Pydantic model": [[4, "pydantic-model"]], "Comparing Solutions": [[4, "comparing-solutions"]], "Best Practices": [[4, "best-practices"]], "Ongoing Debate on LLMs Structured Output": [[4, "ongoing-debate-on-llms-structured-output"]], "Acknowledgements": [[4, "acknowledgements"]]}, "indexentries": {}}) \ No newline at end of file diff --git a/tamingllms/_build/jupyter_execute/markdown/intro.ipynb b/tamingllms/_build/jupyter_execute/markdown/intro.ipynb index 6cbe565..cbb1594 100644 --- a/tamingllms/_build/jupyter_execute/markdown/intro.ipynb +++ b/tamingllms/_build/jupyter_execute/markdown/intro.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "91e6d8fd", + "id": "7ccaec6d", "metadata": {}, "source": [ "(intro)=\n", @@ -58,10 +58,11 @@ "\n", "## Who This Book Is For\n", "\n", - "This book is designed for:\n", + "This book is inteded to Software Developers taking their first steps with Large Language Models. It provides critical insights into the practical challenges of LLM implementation, along with guidance on leveraging open source tools and frameworks to avoid common pitfalls that could derail projects. The goal is to help developers understand and address these challenges early, before they become costly problems too late in the software development lifecycle. \n", "\n", - "- Software Engineers building LLM-powered applications\n", - "- Product Managers leading AI initiatives\n", + "A broader audience for this book includes:\n", + "\n", + "- Technical Product Managers leading AI initiatives\n", "- Technical Leaders making architectural decisions\n", "- Anyone seeking to understand the practical challenges of working with LLMs\n", "\n", diff --git a/tamingllms/markdown/intro.md b/tamingllms/markdown/intro.md index 8166f6b..cc89eaf 100644 --- a/tamingllms/markdown/intro.md +++ b/tamingllms/markdown/intro.md @@ -62,10 +62,11 @@ The current discourse around LLMs tends toward extremes—either uncritical enth ## Who This Book Is For -This book is designed for: +This book is inteded to Software Developers taking their first steps with Large Language Models. It provides critical insights into the practical challenges of LLM implementation, along with guidance on leveraging open source tools and frameworks to avoid common pitfalls that could derail projects. The goal is to help developers understand and address these challenges early, before they become costly problems too late in the software development lifecycle. -- Software Engineers building LLM-powered applications -- Product Managers leading AI initiatives +A broader audience for this book includes: + +- Technical Product Managers leading AI initiatives - Technical Leaders making architectural decisions - Anyone seeking to understand the practical challenges of working with LLMs