import pandas as pd
import numpy as np

df = pd.DataFrame(np.random.randn(5, 3), 
                  index=['a', 'c', 'e', 'f', 'h'],
                  columns=['one', 'two', 'three'])
df = df.reindex(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'])
df

import pandas as pd
import numpy as np

df = pd.DataFrame(np.random.randn(5, 3), index=['a', 'c', 'e', 'f',
'h'],columns=['one', 'two', 'three'])

df = df.reindex(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'])

print (df['one'].isnull())

a    False
b     True
c    False
d     True
e    False
f    False
g     True
h    False
Name: one, dtype: bool

import pandas as pd
import numpy as np

df = pd.DataFrame(np.random.randn(5, 3), index=['a', 'c', 'e', 'f',
'h'],columns=['one', 'two', 'three'])

df = df.reindex(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'])

print (df['one'].notnull())

a     True
b    False
c     True
d    False
e     True
f     True
g    False
h     True
Name: one, dtype: bool

import pandas as pd
import numpy as np

df = pd.DataFrame(np.random.randn(5, 3), index=['a', 'c', 'e', 'f',
'h'],columns=['one', 'two', 'three'])

df = df.reindex(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'])

print (df['one'].sum())

0.6235585460015739

import pandas as pd
import numpy as np

df = pd.DataFrame(index=[0,1,2,3,4,5],columns=['one','two'])
print (df['one'].sum())

0

import pandas as pd
import numpy as np
df = pd.DataFrame(np.random.randn(3, 3), 
                  index=['a', 'c', 'e'],
                  columns=['one', 'two', 'three'])
df = df.reindex(['a', 'b', 'c'])
print (df)
print ("NaN replaced with '0':")
print (df.fillna(0))

        one       two     three
a -0.540132  0.929588 -0.521647
b       NaN       NaN       NaN
c -1.377040 -1.621413  0.494371
NaN replaced with '0':
        one       two     three
a -0.540132  0.929588 -0.521647
b  0.000000  0.000000  0.000000
c -1.377040 -1.621413  0.494371

import pandas as pd
import numpy as np

df = pd.DataFrame(np.random.randn(5, 3),
                  index=['a', 'c', 'e', 'f','h'],
                  columns=['one', 'two', 'three'])
df = df.reindex(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'])

print (df.fillna(method='pad'))

        one       two     three
a -0.963093 -1.214628  0.104516
b -0.963093 -1.214628  0.104516
c  0.308843 -1.109464  0.394322
d  0.308843 -1.109464  0.394322
e  1.871751 -0.628859  0.510016
f  0.881671  1.114474 -0.341950
g  0.881671  1.114474 -0.341950
h -0.866913 -0.393091 -0.218434

/tmp/ipykernel_2645/748388134.py:9: FutureWarning: DataFrame.fillna with 'method' is deprecated and will raise in a future version. Use obj.ffill() or obj.bfill() instead.
  print (df.fillna(method='pad'))

import pandas as pd
import numpy as np

df = pd.DataFrame(np.random.randn(5, 3), 
                  index=['a', 'c', 'e', 'f', 'h'],
                  columns=['one', 'two', 'three'])

df = df.reindex(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'])
print (df.fillna(method='backfill'))

        one       two     three
a -1.130716 -0.851352 -0.471409
b  0.470916 -0.353104  0.471979
c  0.470916 -0.353104  0.471979
d  0.968244 -0.220899  1.246868
e  0.968244 -0.220899  1.246868
f  1.557570  2.105571 -0.657172
g -0.139376 -0.955506  0.197256
h -0.139376 -0.955506  0.197256

/tmp/ipykernel_2645/3179451599.py:9: FutureWarning: DataFrame.fillna with 'method' is deprecated and will raise in a future version. Use obj.ffill() or obj.bfill() instead.
  print (df.fillna(method='backfill'))

import pandas as pd
import numpy as np

df = pd.DataFrame(np.random.randn(5, 3), index=['a', 'c', 'e', 'f',
'h'],columns=['one', 'two', 'three'])

df = df.reindex(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'])
print (df.dropna())

        one       two     three
a  0.102280 -0.134644 -0.065381
c  1.849913 -0.258381 -0.449780
e -1.427968  0.994814  0.067866
f  0.052859  0.782432  0.660821
h  1.404502  0.357647  0.441647

import pandas as pd
import numpy as np

df = pd.DataFrame(np.random.randn(5, 3), index=['a', 'c', 'e', 'f',
'h'],columns=['one', 'two', 'three'])

df = df.reindex(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'])
print (df.dropna(axis=1))

Empty DataFrame
Columns: []
Index: [a, b, c, d, e, f, g, h]

import pandas as pd
import numpy as np
df = pd.DataFrame({'one':[10,20,30,40,50,2000],
'two':[1000,0,30,40,50,60]})
print (df.replace({1000:10,2000:60}))

   one  two
0   10   10
1   20    0
2   30   30
3   40   40
4   50   50
5   60   60

import pandas as pd
import numpy as np
df = pd.DataFrame({'one':[10,20,30,40,50,2000],
'two':[1000,0,30,40,50,60]})
print (df.replace({1000:10,2000:60}))

   one  two
0   10   10
1   20    0
2   30   30
3   40   40
4   50   50
5   60   60

	one	two	three
a	-0.046133	0.542130	0.244085
b	NaN	NaN	NaN
c	-0.544291	0.908537	-1.915221
d	NaN	NaN	NaN
e	0.219617	-0.144276	0.362802
f	1.218952	0.371396	1.206544
g	NaN	NaN	NaN
h	0.043141	-1.546063	0.762845

方法	动作
`pad/fill`	填充方法向前
`bfill/backfill`	填充方法向后

检查缺失值

缺少数据的计算

清理/填充缺少数据

用标量值替换NaN

向前和向后填充 `NA`

丢失缺少的值

替换丢失(或)通用值

① 阅读使用手册

② 注册用户账号

介绍

平台内核

注意事项

检查缺失值

缺少数据的计算

清理/填充缺少数据

用标量值替换NaN

向前和向后填充 NA

丢失缺少的值

替换丢失(或)通用值

① 阅读使用手册

② 注册用户账号

③ 登陆

Python基础

Python进阶

标准类库

专题工具

图像处理

科学计算

自然语言

开源GIS

R与Julia

介绍

平台内核

注意事项

向前和向后填充 `NA`