Toxicity

Toxicity

Helps prevent harmful content from entering your system or being echoed back unfiltered.

Example

from testsavant.guard import InputGuard
from testsavant.guard.input_scanners import Toxicity
import os

# It's recommended to set these as environment variables
# os.environ["TEST_SAVANT_API_KEY"] = "YOUR_API_KEY"
# os.environ["TEST_SAVANT_PROJECT_ID"] = "YOUR_PROJECT_ID"

api_key = os.environ.get("TEST_SAVANT_API_KEY")
project_id = os.environ.get("TEST_SAVANT_PROJECT_ID")

input_guard = InputGuard(API_KEY=api_key, PROJECT_ID=project_id)
input_guard.add_scanner(Toxicity(threshold=0.5))

prompts = [
    "you fucking idiot, you are so stupid",
    "you are so smart",
    "you are so stupid",
    "you are so dumb"
]

for prompt in prompts:
    result = input_guard.scan(prompt)
    if result.is_valid:
        print(f"'{prompt}' is a valid prompt")
    else:
        print(f"'{prompt}' is an invalid prompt")

# Output:
# 'you fucking idiot, you are so stupid' is an invalid prompt
# 'you are so smart' is a valid prompt
# 'you are so stupid' is an invalid prompt
# 'you are so dumb' is an invalid prompt

Parameters

  • threshold: float (default: 0.5) — decision threshold; higher is stricter.
  • tag: str (default: "base") — label to group rule configuration.

Related