Toxicity
Toxicity
Helps prevent harmful content from entering your system or being echoed back unfiltered.
Example
from testsavant.guard import InputGuard
from testsavant.guard.input_scanners import Toxicity
import os
# It's recommended to set these as environment variables
# os.environ["TEST_SAVANT_API_KEY"] = "YOUR_API_KEY"
# os.environ["TEST_SAVANT_PROJECT_ID"] = "YOUR_PROJECT_ID"
api_key = os.environ.get("TEST_SAVANT_API_KEY")
project_id = os.environ.get("TEST_SAVANT_PROJECT_ID")
input_guard = InputGuard(API_KEY=api_key, PROJECT_ID=project_id)
input_guard.add_scanner(Toxicity(threshold=0.5))
prompts = [
"you fucking idiot, you are so stupid",
"you are so smart",
"you are so stupid",
"you are so dumb"
]
for prompt in prompts:
result = input_guard.scan(prompt)
if result.is_valid:
print(f"'{prompt}' is a valid prompt")
else:
print(f"'{prompt}' is an invalid prompt")
# Output:
# 'you fucking idiot, you are so stupid' is an invalid prompt
# 'you are so smart' is a valid prompt
# 'you are so stupid' is an invalid prompt
# 'you are so dumb' is an invalid prompt
Parameters
- threshold: float (default: 0.5) — decision threshold; higher is stricter.
- tag: str (default: "base") — label to group rule configuration.