import numpy as np
rand = np.random.RandomState(42)

x = rand.randint(100, size=10)
print(x)

[51 92 14 71 60 20 82 86 74 74]

[x[3], x[7], x[2]]

[71, 86, 14]

ind = [3, 7, 4]
x[ind]

array([71, 86, 60])

ind = np.array([[3, 7],
                [4, 5]]) # 索引数组是一个2x2数组，结果也将会是一个2x2数组
x[ind]

array([[71, 86],
       [60, 20]])

X = np.arange(12).reshape((3, 4))
X

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

row = np.array([0, 1, 2])
col = np.array([2, 1, 3])
X[row, col]

array([ 2,  5, 11])

X[row[:, np.newaxis], col]

array([[ 2,  1,  3],
       [ 6,  5,  7],
       [10,  9, 11]])

row[:, np.newaxis] * col

array([[0, 0, 0],
       [2, 1, 3],
       [4, 2, 6]])

print(X)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]

X[2, [2, 0, 1]]

array([10,  8,  9])

X[1:, [2, 0, 1]]

array([[ 6,  4,  5],
       [10,  8,  9]])

mask = np.array([1, 0, 1, 0], dtype=bool)
X[row[:, np.newaxis], mask]

array([[ 0,  2],
       [ 4,  6],
       [ 8, 10]])

mean = [0, 0]
cov = [[1, 2],
       [2, 5]]
X = rand.multivariate_normal(mean, cov, 100)
X.shape

(100, 2)

%matplotlib inline
import matplotlib.pyplot as plt
import seaborn; seaborn.set()  # 设置图表风格，seaborn

plt.scatter(X[:, 0], X[:, 1]);

indices = np.random.choice(X.shape[0], 20, replace=False)
indices

array([66, 38, 68, 88, 94, 50, 73, 69, 95, 31, 89, 39, 20, 85, 34, 49, 48,
       96, 29, 44])

selection = X[indices]  # 使用高级索引
selection.shape

(20, 2)

plt.scatter(X[:, 0], X[:, 1], alpha=0.3)
plt.scatter(selection[:, 0], selection[:, 1],
            facecolor='none', s=200);

x = np.arange(10)
i = np.array([2, 1, 8, 4])
x[i] = 99
print(x)

[ 0 99 99  3 99  5  6  7 99  9]

x[i] -= 10
print(x)

[ 0 89 89  3 89  5  6  7 89  9]

x = np.zeros(10)
x[[0, 0]] = [4, 6]
print(x)

[6. 0. 0. 0. 0. 0. 0. 0. 0. 0.]

i = [2, 3, 3, 4, 4, 4]
x[i] += 1
x

array([6., 0., 1., 1., 1., 0., 0., 0., 0., 0.])

x = np.zeros(10)
np.add.at(x, i, 1)
print(x)

[0. 0. 1. 2. 3. 0. 0. 0. 0. 0.]

np.random.seed(42)
x = np.random.randn(100) # 获得一个一维100个标准正态分布值

# 得到一个自定义的数据分组，区间-5至5平均取20个点，每个区间为一个数据分组
bins = np.linspace(-5, 5, 20)
counts = np.zeros_like(bins) # counts是x数值落入区间的计数

# 使用searchsorted，得到x每个元素在bins中落入的区间序号
i = np.searchsorted(bins, x)

# 使用at和add，对x元素在每个区间的元素个数进行计算
np.add.at(counts, i, 1)

# 用图表展示结果
plt.plot(bins, counts, ds='steps');

plt.hist(x, bins, histtype='step');

print("NumPy routine:")
%timeit counts, edges = np.histogram(x, bins)

print("Custom routine:")
%timeit np.add.at(counts, np.searchsorted(bins, x), 1)

NumPy routine:
22.1 µs ± 381 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)
Custom routine:
16 µs ± 609 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)

x = np.random.randn(1000000)
print("NumPy routine:")
%timeit counts, edges = np.histogram(x, bins)

print("Custom routine:")
%timeit np.add.at(counts, np.searchsorted(bins, x), 1)

NumPy routine:
80 ms ± 1.23 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
Custom routine:
121 ms ± 342 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)

初探高级索引

组合索引

例子：选择随机点

使用高级索引修改数据

例子：数据分组

① 阅读使用手册

② 注册用户账号

介绍

平台内核

注意事项

初探高级索引

组合索引

例子：选择随机点

使用高级索引修改数据

例子：数据分组

① 阅读使用手册

② 注册用户账号

③ 登陆

Python基础

Python进阶

标准类库

专题工具

图像处理

科学计算

自然语言

开源GIS

R与Julia

介绍

平台内核

注意事项