0% found this document useful (0 votes)
38 views2 pages

Wandb

Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
38 views2 pages

Wandb

Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 2

!

pip install cohere -q


!pip install aif360 -q
!pip install BlackBoxAuditing -q
!pip install wandb -q
import os
import cohere
import urllib.request
from aif360.datasets import AdultDataset
from aif360.metrics import BinaryLabelDatasetMetric
from aif360.algorithms.preprocessing import Reweighing
import wandb

# Initialize WandB
wandb.init(project="bias-mitigation-cohere")

# Set your Cohere API key


cohere_api_key = "xmQDWfwuhlJKvpPlHlNrrAHFqrEAlOfiiRu8aNh0" # Replace with your
actual key
co = cohere.Client(cohere_api_key)

# Function to retrieve information from the Adult dataset


def retrieve_information(query):
dataset = AdultDataset()
num_samples = len(dataset.labels)
num_positive_samples = sum(dataset.labels).item()
positive_rate = num_positive_samples / num_samples
return f"The Adult dataset contains {num_samples} samples, of which
{num_positive_samples} are positive. The positive rate is {positive_rate:.2f}."

# Function to augment retrieved information using Cohere’s model


def augment_information(retrieved_information, query):
prompt = f"Based on the following information, provide additional information
relevant to the query: '{query}'\n\nInformation: {retrieved_information}\n\
nAdditional information:"
response = co.generate(
model="command-xlarge-nightly",
prompt=prompt,
max_tokens=150,
temperature=0.5
)
return response.generations[0].text.strip()

# Function to generate a response to a user query


def generate_response(query):
retrieved_information = retrieve_information(query)
augmented_information = augment_information(retrieved_information, query)
prompt = f"Answer the following question based on the information provided:\n\
nQuestion: {query}\n\nInformation: {augmented_information}\n\nAnswer:"
response = co.generate(
model="command-xlarge-nightly",
prompt=prompt,
max_tokens=150,
temperature=0.5
)
return response.generations[0].text.strip()

# Dataset URLs for downloading


urls = [
"https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data",
"https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.test",
"https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.names"
]

# Directory to save dataset files


save_dir = "/usr/local/lib/python3.10/dist-packages/aif360/data/raw/adult"
os.makedirs(save_dir, exist_ok=True)

# Download dataset files


for url in urls:
filename = url.split("/")[-1]
save_path = os.path.join(save_dir, filename)
urllib.request.urlretrieve(url, save_path)
print(f"Downloaded {filename} to {save_path}")

# Load and test for bias in the original dataset


dataset = AdultDataset()
metric_orig = BinaryLabelDatasetMetric(dataset, unprivileged_groups=[{"race": 1}],
privileged_groups=[{"race": 0}])
orig_disparate_impact = metric_orig.disparate_impact()
print("Original dataset")
print("Disparate impact: ", orig_disparate_impact)

# Log metrics to WandB


wandb.log({"Original Disparate Impact": orig_disparate_impact})

# Mitigate bias using Reweighing


rw = Reweighing(unprivileged_groups=[{"race": 1}], privileged_groups=[{"race": 0}])
dataset_transf = rw.fit_transform(dataset)

# Testing for bias after mitigation


metric_transf = BinaryLabelDatasetMetric(dataset_transf,
unprivileged_groups=[{"race": 1}], privileged_groups=[{"race": 0}])
transf_disparate_impact = metric_transf.disparate_impact()
print("Transformed dataset")
print("Disparate impact: ", transf_disparate_impact)

# Log transformed dataset metrics to WandB


wandb.log({"Transformed Disparate Impact": transf_disparate_impact})

# Example usage
query = "What is the positive rate of the Adult dataset?"
response = generate_response(query)
print("Response to user query:")
print(response)

You might also like