Answer:
import re
def split_ngrams(text, n):
# Add "START" and "END" to the sentence
sentence = "START " + text.lower() + " END"
# Remove word-external punctuation
sentence = re.sub(r"[^\w\s]+", "", sentence)
# Split the sentence into a list of words
words = sentence.split()
# Extract all N-grams of size n from the list of words
ngrams = []
for i in range(len(words) - n + 1):
ngram = tuple(words[i:i+n])
ngrams.append(ngram)
return ngrams
# Example usage
text = "It's raining cats and dogs!"
n = 2
print(split_ngrams(text, n)) # Output: [('START', 'its'), ('its', 'raining'), ('raining', 'cats'), ('cats', 'and'), ('and', 'dogs'), ('dogs', 'END')]