Money Tips
HomeAIAuto-Completion Style Text Generation with GPT-2 Model

Auto-Completion Style Text Generation with GPT-2 Model

Published on

spot_img


from functools import lru_cache

from transformers import GPT2LMHeadModel, GPT2Tokenizer

import torch

 

 

class AutoComplete:

    def __init__(self, model_name=“gpt2”):

        “”“Initialize the auto-complete system.”“”

        self.tokenizer = GPT2Tokenizer.from_pretrained(model_name, padding_side=“left”)

        self.model = GPT2LMHeadModel.from_pretrained(model_name)

        self.device = “cuda” if torch.cuda.is_available() else “cpu”

        self.model.to(self.device)

        self.model.eval()  # Set to evaluation mode

 

    def get_completion(self, text, max_length=50):

        “”“Generate completion for the input text.”“”

        print(“**** Completion:”, text)

        # Encode the input text

        inputs = self.tokenizer(text, add_special_tokens=False, return_tensors=“pt”)

        input_ids = inputs[“input_ids”].to(self.device)

        attn_masks = inputs[“attention_mask”].to(self.device)

 

        # Generate completion

        with torch.no_grad():

            outputs = self.model.generate(

                input_ids,

                attention_mask=attn_masks,

                max_length=max_length,

                num_return_sequences=1,

                pad_token_id=self.tokenizer.eos_token_id,

                do_sample=True,

                temperature=0.7

            )

 

        # Decode and extract completion

        full_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)

        completion = full_text[len(text):]

 

        return completion

 

 

class CachedAutoComplete(AutoComplete):

    def __init__(self, cache_size=1000, **kwargs):

        “”“Initialize with caching support.”“”

        super().__init__(**kwargs)

        self.get_completion = lru_cache(maxsize=cache_size)(

            self.get_completion

        )

 

 

class OptimizedAutoComplete(CachedAutoComplete):

    def __init__(self, **kwargs):

        “”“Initialize with optimizations.”“”

        super().__init__(**kwargs)

        self.tokenizer.pad_token = self.tokenizer.eos_token

 

        if self.device == “cuda”:

            self.model = self.model.half()  # Use FP16 on GPU

 

        # use eval mode and cuda graphs

        self.model.eval()

 

    def preprocess_batch(self, texts):

        “”“Efficiently process multiple texts.”“”

        # Tokenize all texts at once

        inputs = self.tokenizer(texts, padding=True, truncation=True, return_tensors=“pt”)

        return inputs.to(self.device)

 

    def generate_batch(self, texts, max_length=50):

        “”“Generate completions for multiple texts.”“”

        # Preprocess batch

        inputs = self.preprocess_batch(texts)

 

        # Generate completions

        with torch.no_grad():

            outputs = self.model.generate(

                inputs[“input_ids”],

                attention_mask=inputs[“attention_mask”],

                max_length=max_length,

                num_return_sequences=1,

                pad_token_id=self.tokenizer.eos_token_id,

                do_sample=True,

                temperature=0.7

            )

 

        # Decode completions

        completions = self.tokenizer.batch_decode(outputs, skip_special_tokens=True)

 

        # Extract new text

        results = []

        for text, completion in zip(texts, completions):

            results.append(completion[len(text):])

 

        return results

 

# Example: Optimized batch completion

optimized_complete = OptimizedAutoComplete()

texts = [

    “Machine learning is”,

    “Deep neural networks can”,

    “The training process involves”

]

completions = optimized_complete.generate_batch(texts)

for text, completion in zip(texts, completions):

    print(f“\nInput: {text}”)

    print(f“Completion: {completion}”)



Source link

Latest articles

How Healthcare Providers Can Put Their Analytics to Work

Today, almost every healthcare institution...

The Favorite Slot Online Site Indonesian Players

Jandaslot In the vibrant landscape of online gambling, Jandaslot has quickly become a...

PoSciDonDAO Approves First Research Funding Proposal from Rare Compute

PoSciDonDAO, a decentralized organization dedicated to advancing scientific research, has approved its first-ever...

OpenAI debuts Codex CLI, an open source coding tool for terminals

In a bid to inject AI into more of the programming process, OpenAI...

More like this

How Healthcare Providers Can Put Their Analytics to Work

Today, almost every healthcare institution...

The Favorite Slot Online Site Indonesian Players

Jandaslot In the vibrant landscape of online gambling, Jandaslot has quickly become a...

PoSciDonDAO Approves First Research Funding Proposal from Rare Compute

PoSciDonDAO, a decentralized organization dedicated to advancing scientific research, has approved its first-ever...