Commonly used regex for data preprocessing

Adding a space before a punctuation if it is not already spaced.

import re

ex_str = 'This house is very old.'
ex_str = re.sub(r"([?.!,])", r" \1 ", ex_str)
ex_str = re.sub(r'[" "]+', " ", ex_str)
ex_str = re.sub("[^a-zA-Z?.!]+". " ", w)
ex_str = ex_str.strip()
print(ex_str)
  1. Identifiers
    1. *
    2. .
    3. +
    4. ?
    5. []
    6. \1
    7. .*
    8. .+
    9. .? 10.