Code2pdf 67c734433f773

The document provides Python code for loading and formatting data for a machine learning model using the Alpaca prompt structure. It includes functions for tokenization, data collation, and loading federated dataset partitions. Additionally, there is a utility function to replace specific characters in dictionary keys.

Uploaded by

Nitish chowdary

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

47 views1 page

Code2pdf 67c734433f773

Uploaded by

Nitish chowdary

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

You are on page 1/ 1

from transformers import AutoTokenizer

from trl import DataCollatorForCompletionOnlyLM

from flwr_datasets.partitioner import IidPartitioner

from flwr_datasets import FederatedDataset

FDS = None # Cache FederatedDataset

def formatting_prompts_func(example):
output_texts = []
# Constructing a standard Alpaca (https://github.com/tatsu-lab/stanford_alpaca#data-release) prompt
mssg = "Below is an instruction that describes a task. Write a response that appropriately completes the request."
for i in range(len(example["instruction"])):
text = f"{mssg}\n### Instruction:\n{example['instruction'][i]}\n### Response: {example['response'][i]}"
output_texts.append(text)
return output_texts

def get_tokenizer_and_data_collator_and_propt_formatting(model_name: str):

# From: https://huggingface.co/docs/trl/en/sft_trainer
tokenizer = AutoTokenizer.from_pretrained(
model_name, use_fast=True, padding_side="right"
)
tokenizer.pad_token = tokenizer.eos_token
response_template_with_context = "\n### Response:" # alpaca response tag
response_template_ids = tokenizer.encode(
response_template_with_context, add_special_tokens=False
)[2:]
data_collator = DataCollatorForCompletionOnlyLM(
response_template_ids, tokenizer=tokenizer
)

return tokenizer, data_collator, formatting_prompts_func

def load_data(partition_id: int, num_partitions: int, dataset_name: str):

"""Load partition data."""
# Only initialize `FederatedDataset` once
global FDS
if FDS is None:
partitioner = IidPartitioner(num_partitions=num_partitions)
FDS = FederatedDataset(
dataset=dataset_name,
partitioners={"train": partitioner},
)
client_trainset = FDS.load_partition(partition_id, "train")
client_trainset = client_trainset.rename_column("output", "response")

return client_trainset

def replace_keys(input_dict, match="-", target="_"):

"""Recursively replace match string with target string in dictionary keys."""
new_dict = {}
for key, value in input_dict.items():
new_key = key.replace(match, target)
if isinstance(value, dict):
new_dict[new_key] = replace_keys(value, match, target)
else:
new_dict[new_key] = value
return new_dict

Code2pdf 67c73149b96ef
No ratings yet
Code2pdf 67c73149b96ef
4 pages
Train Edu Bert
No ratings yet
Train Edu Bert
3 pages
Experiment 10 NLP
No ratings yet
Experiment 10 NLP
5 pages
Retorno 1
No ratings yet
Retorno 1
29 pages
Python Scripts
No ratings yet
Python Scripts
5 pages
NLP
No ratings yet
NLP
15 pages
NLP 4
No ratings yet
NLP 4
10 pages
566f0619-9145-4b8f-b12b-cb8a5b0cd30d
No ratings yet
566f0619-9145-4b8f-b12b-cb8a5b0cd30d
17 pages
RLDL
No ratings yet
RLDL
23 pages
Run 1
No ratings yet
Run 1
57 pages
LLM Code Ref
No ratings yet
LLM Code Ref
10 pages
Bert
No ratings yet
Bert
2 pages
Transformer
No ratings yet
Transformer
3 pages
Import As: Pandas PD DF PD - Read - CSV DF - Head
No ratings yet
Import As: Pandas PD DF PD - Read - CSV DF - Head
91 pages
Null 0
No ratings yet
Null 0
6 pages
Medical Text Classifier GabrieldeOlaguibel
No ratings yet
Medical Text Classifier GabrieldeOlaguibel
12 pages
CCC
No ratings yet
CCC
25 pages
Exp 8 Machine Translation
No ratings yet
Exp 8 Machine Translation
11 pages
Simple Neural Network - Ipynb
No ratings yet
Simple Neural Network - Ipynb
4 pages
Deep Learning
No ratings yet
Deep Learning
46 pages
EncoderDecoderSeq2Seq DeepLSTM
No ratings yet
EncoderDecoderSeq2Seq DeepLSTM
7 pages
Sample Code
No ratings yet
Sample Code
8 pages
A4
No ratings yet
A4
8 pages
Week 2 - Lab
No ratings yet
Week 2 - Lab
9 pages
Command Classifier
No ratings yet
Command Classifier
4 pages
Expt 5 Expt 6
No ratings yet
Expt 5 Expt 6
10 pages
Effects of Batches - Jupyter Notebook
No ratings yet
Effects of Batches - Jupyter Notebook
73 pages
Sample
No ratings yet
Sample
6 pages
Project Source
No ratings yet
Project Source
21 pages
Intent Recognizer
No ratings yet
Intent Recognizer
5 pages
Assignment 2.3.1 Transfer Learning
No ratings yet
Assignment 2.3.1 Transfer Learning
7 pages
Transformer
No ratings yet
Transformer
10 pages
Sumati
No ratings yet
Sumati
10 pages
Finding Similar Fashion Products With Their Links
No ratings yet
Finding Similar Fashion Products With Their Links
19 pages
Assignment 7
No ratings yet
Assignment 7
10 pages
Parameter Efficient Fine
No ratings yet
Parameter Efficient Fine
14 pages
Assignment 3 DS5620
No ratings yet
Assignment 3 DS5620
11 pages
Python CA 4
No ratings yet
Python CA 4
9 pages
Recurrent Neural Networks: Pytorch
No ratings yet
Recurrent Neural Networks: Pytorch
6 pages
NLP
No ratings yet
NLP
6 pages
Course 3 - Week 2 - Exercise - Answer - Ipynb - Colaboratory
No ratings yet
Course 3 - Week 2 - Exercise - Answer - Ipynb - Colaboratory
8 pages
Exe 1
No ratings yet
Exe 1
13 pages
Practical 02
No ratings yet
Practical 02
5 pages
DL Lab - Merged
No ratings yet
DL Lab - Merged
60 pages
Autonomous
No ratings yet
Autonomous
22 pages
Sentence Embedding Code
No ratings yet
Sentence Embedding Code
9 pages
Final Code
No ratings yet
Final Code
16 pages
Fine-Tuned Vs RAG Short Notes ?
No ratings yet
Fine-Tuned Vs RAG Short Notes ?
25 pages
Transfer Learning With MobileNetV2 Test1
No ratings yet
Transfer Learning With MobileNetV2 Test1
16 pages
Karpathy MinGPT Model
No ratings yet
Karpathy MinGPT Model
7 pages
Centralized LLM Fine-Tuning
No ratings yet
Centralized LLM Fine-Tuning
4 pages
Trainrealfill
No ratings yet
Trainrealfill
19 pages
CV Prince
No ratings yet
CV Prince
120 pages
Machine Learning - Lab Manual
No ratings yet
Machine Learning - Lab Manual
35 pages
Deep Learning
No ratings yet
Deep Learning
30 pages
Lab Report 03
No ratings yet
Lab Report 03
14 pages
(P) Program AIO
No ratings yet
(P) Program AIO
22 pages
DLP Lab
No ratings yet
DLP Lab
81 pages
Profound Python Data Science
From Everand
Profound Python Data Science
Onder Teker
No ratings yet

Code2pdf 67c734433f773

Uploaded by

Code2pdf 67c734433f773

Uploaded by

from transformers import AutoTokenizer

from trl import DataCollatorForCompletionOnlyLM

from flwr_datasets.partitioner import IidPartitioner

FDS = None # Cache FederatedDataset

def get_tokenizer_and_data_collator_and_propt_formatting(model_name: str):

return tokenizer, data_collator, formatting_prompts_func

def load_data(partition_id: int, num_partitions: int, dataset_name: str):

def replace_keys(input_dict, match="-", target="_"):

You might also like