Pandas: map, applymap, apply
map: on Series only; on one element at a time; takes dicts, series, or callable;
applymap: on DataFrame only; on one element at a time; takes callables only;
apply: Both Series & DataFrame; on entire rows or columns at a time; takes callables only;
df['A'].map({1:'a', 2:'b', 3:'c'})
df[['A','B','C']].applymap(str.strip)
df['sentences'].apply(nltk.sent_tokenize)
#extract year from year-month: extract 1997 from 1997-12
def trunc_str(year):
x = re.findall('([\d]{4})',year)
if x:
return(x.group())
df['sentence'] = df['sentences'].apply(lambda x: trunc_str(x))
df['sentence'] = df['sentences'].apply(trunc_str)
df['sentence'] = df['sentences'].str.extract(r'(\d{4})-\d{2}')
df.replace('(-d)','',regex=True, inplace=True)
#remove dash(-) followed by a number from Series
df.replace('[nN]ew','New_',regex=True)
#replace nNew to New_
df['Email'].apply(lambda x: re.sub(r'@', 'at', x))
[x.strip() for x in s.split(',')]
df.apply(lambda x: x.C.max() - x.B.min())
df.groupby['A'][['B','C']].apply(lambda x: x.C.max() - x.B.min())
https://stackoverflow.com/questions/25292838/applying-regex-to-a-pandas-dataframe