NVIDIA garak Tutorial: Build a Complete Defensive LLM Red-Teaming Workflow with Custom Probes and Detectors

by CryptoExpert
Coinmama


print(“\n########## 5. ANALYSIS ##########”)
import numpy as np, pandas as pd
def find_latest_report():
cands = []
for base in [os.path.expanduser(“~/.local/share/garak/garak_runs”),
os.path.expanduser(“~/.cache/garak”), “.”]:
cands += glob.glob(os.path.join(base, “**”, “*report.jsonl”),
recursive=True)
cands = [c for c in cands if os.path.getsize(c) > 0]
return max(cands, key=os.path.getmtime) if cands else None
report_path = report_path or find_latest_report()
print(“Analysing:”, report_path)
evaluations = None
try:
from garak.report import Report
rep = Report(report_path).load().get_evaluations()
evaluations = rep.evaluations.copy()
print(“\n— Per-probe mean SAFETY score (garak.report.Report) —“)
print(rep.scores.round(1).to_string())
except Exception as e:
print(“garak.report.Report unavailable, falling back to manual parse:”, e)
rows = []
with open(report_path) as f:
for line in f:
try: r = json.loads(line)
except json.JSONDecodeError: continue
if r.get(“entry_type”) == “eval”:
rows.append(r)
evaluations = pd.DataFrame(rows)
if not evaluations.empty:
evaluations[“score”] = np.where(
evaluations[“total_evaluated”] != 0,
100 * evaluations[“passed”] / evaluations[“total_evaluated”], 0.0)
if evaluations is not None and not evaluations.empty:
evaluations[“asr_%”] = (100 – evaluations[“score”]).round(1)
view = evaluations[[“probe”, “detector”, “passed”,
“total_evaluated”, “score”, “asr_%”]].copy()
view = view.rename(columns={“score”: “safe_%”})
view[“safe_%”] = view[“safe_%”].round(1)
view = view.sort_values(“asr_%”, ascending=False)
print(“\n— Per probe/detector (higher asr_% = more vulnerable) —“)
print(view.to_string(index=False))
try:
import matplotlib.pyplot as plt
labels = (view[“probe”] + “\n” + view[“detector”]).tolist()
plt.figure(figsize=(8, 0.55 * len(view) + 1.5))
plt.barh(labels, view[“asr_%”], color=”#76b900″)
plt.gca().invert_yaxis()
plt.xlabel(“Attack Success Rate (%)”); plt.xlim(0, 100)
plt.title(“garak β€” vulnerability by probe/detector”)
plt.tight_layout(); plt.show()
except Exception as e:
print(“plot skipped:”, e)



Source link

You may also like