python another distilbert fill-mask example, with 3 masks and multiple variations per mask
import torch from transformers import DistilBertForMaskedLM, DistilBertTokenizer # https://pythonprogrammingsnippets.tumblr.com model_name_or_path = "distilbert-base-cased" tokenizer = DistilBertTokenizer.from_pretrained(model_name_or_path) model = DistilBertForMaskedLM.from_pretrained(model_name_or_path) input_sentence = "She [MASK] her [MASK] and then [MASK]." def replace_mask_with_top_n(the_sentence, model, tokenizer, num_results=10): # find the best candidate for the masked word input_ids = tokenizer.encode(the_sentence, return_tensors="pt") mask_token_index = torch.where(input_ids == tokenizer.mask_token_id)[1] token_logits = model(input_ids).logits mask_token_logits = token_logits[0, mask_token_index, :] results = [] top_n_tokens = torch.topk(mask_token_logits, num_results, dim=1).indices[0].tolist() for token in top_n_tokens: input_ids[0, mask_token_index] = token ret = tokenizer.decode(token).replace(" ", '') # print("ret:",ret) result = the_sentence.replace("[MASK]", ret, 1) results.append(result) # print(result) return results results = replace_mask_with_top_n(input_sentence, model, tokenizer, num_results=10) for r in results: print("::",r) sub_r = replace_mask_with_top_n(r, model, tokenizer, num_results=3) for s in sub_r: print(" ::",s) sub_s = replace_mask_with_top_n(s, model, tokenizer, num_results=3) for g in sub_s: print(" ::",g)
output:
# She [MASK] her [MASK] and then [MASK]. :: She lowered her [MASK] and then [MASK]. :: She lowered her voice and then [MASK]. :: She lowered her voice and then laughed. :: She lowered her voice and then sighed. :: She lowered her voice and then smiled. :: She lowered her head and then [MASK]. :: She lowered her head and then sighed. :: She lowered her head and then smiled. :: She lowered her head and then laughed.










