Fork me on GitHub

100 numpy exercises 总结篇

查看numpy的版本和配置

1
2
3
4
import numpy as np
np.__version__ # 查看版本

np.show_config() # 查看配置

np.zeros

默认$dtype$是浮点型的0

1
2
np.zeros(10)     # 一维
np.zeros((3 , 3)) # 多维

np.ndarray.itemsize

查看$ndarray$中一个元素的内存

1
2
3
a = np.random.randn(3 , 4)
# 输出:'float64' , 8 , 8 * 12 = 96(ndarray所占的内存)
print(a.dtype , a.itemsize , a.itemsize * a.size)

reverse一个vector

1
np.arange(10)[::-1] # 利用切片来转置

寻找ndarray的非0元素的下标

使用np.nonzero()

1
2
3
4
5
6
7
8
9
10
11
12
a = np.arange(12).reshape(3 , 4)
a[1][1] = 0
'''
array([[ 0, 1, 2, 3],
[ 4, 0, 6, 7],
[ 8, 9, 10, 11]])
'''
np.nonzero(a) # 由于是2维的,所以len = 2,返回值是tuple
'''
(array([0, 0, 0, 1, 1, 1, 2, 2, 2, 2], dtype=int64),
array([1, 2, 3, 0, 2, 3, 0, 1, 2, 3], dtype=int64))
'''

创建一个单位矩阵

np.eye

1
2
3
4
5
6
np.eye(3 , 3)   # dtype 为浮点型
'''
array([[1., 0., 0.],
[0., 1., 0.],
[0., 0., 1.]])
'''

np.random.random()

Return random floats in the half-open interval $[0.0, 1.0)$.

1
np.random.random((3 , 3))

寻找ndarray的最大值和最小值

1
2
3
4
a = np.arange(12).reshape(3 , 4)
a.max() # 寻找最大值,11
a.max(axis = 0) # 输出:array([ 8, 9, 10, 11])
a.min()

使一个array的周围都是0

1
2
3
4
5
6
7
8
9
10
11
a = np.ones((5 , 5))
a[: , [0 , -1]] = 0
a[[0 , -1] , :] = 0
a
'''
array([[0., 0., 0., 0., 0.],
[0., 1., 1., 1., 0.],
[0., 1., 1., 1., 0.],
[0., 1., 1., 1., 0.],
[0., 0., 0., 0., 0.]])
'''

np.nan和np.inf的一些运算

1
2
3
4
5
6
7
8
0 * np.nan # nan
np.nan == np.nan # False
np.inf > np.nan , np.inf < np.nan , np.inf == np.nan # (False , False , False)
np.nan - np.nan # nan
np.inf - np.inf # nan
np.nan in set([1 , 2 , np.nan]) # True
type(np.nan) # float
0.3 == 0.1 * 3 # False

np.diag(v , k)

v: array_like

按照k和v来判断矩阵的大小(是方阵)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
np.diag(1 + np.arange(4) , k = 1)  # 对角线上一格  5 * 5
'''
array([[0, 1, 0, 0, 0],
[0, 0, 2, 0, 0],
[0, 0, 0, 3, 0],
[0, 0, 0, 0, 4],
[0, 0, 0, 0, 0]])
'''
np.diag(1 + np.arange(4) , k = -1) # 对角线下一格 5 * 5
'''
array([[0, 0, 0, 0, 0],
[1, 0, 0, 0, 0],
[0, 2, 0, 0, 0],
[0, 0, 3, 0, 0],
[0, 0, 0, 4, 0]])
'''
np.diag(1 + np.arange(4) , k = 0) # 在对角线上 4 * 4
'''
array([[1, 0, 0, 0],
[0, 2, 0, 0],
[0, 0, 3, 0],
[0, 0, 0, 4]])
'''

创建一个checkerboard pattern的矩阵

方法1:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
a = np.ones((8 , 8) , dtype = 'int')
a[::2 , ::2] = 0
a[1::2 , 1::2] = 0
a
'''
array([[0, 1, 0, 1, 0, 1, 0, 1],
[1, 0, 1, 0, 1, 0, 1, 0],
[0, 1, 0, 1, 0, 1, 0, 1],
[1, 0, 1, 0, 1, 0, 1, 0],
[0, 1, 0, 1, 0, 1, 0, 1],
[1, 0, 1, 0, 1, 0, 1, 0],
[0, 1, 0, 1, 0, 1, 0, 1],
[1, 0, 1, 0, 1, 0, 1, 0]])
'''

方法2:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
np.tile(np.array([[0 , 1] , [1 , 0]]) , (4 , 4))
'''
[[0 , 1]
[1 , 0]] 的块在axis = 0 和 axis = 1 上都重复4次
'''

'''
array([[0, 1, 0, 1, 0, 1, 0, 1],
[1, 0, 1, 0, 1, 0, 1, 0],
[0, 1, 0, 1, 0, 1, 0, 1],
[1, 0, 1, 0, 1, 0, 1, 0],
[0, 1, 0, 1, 0, 1, 0, 1],
[1, 0, 1, 0, 1, 0, 1, 0],
[0, 1, 0, 1, 0, 1, 0, 1],
[1, 0, 1, 0, 1, 0, 1, 0]])
'''

给定一个array的维度,如何查看第i个元素的下标是多少

np.unravel_index

1
2
np.unravel_index(99 , (6 , 7 , 8))  # shape = (6 , 7 , 8)的第99个元素的小标为
# (1, 5, 3)

创建一个dtype来描述(RGBA)

1
dtype1 = np.dtype([('r' , np.ubyte) , ('g' , np.ubyte) , ('b' , np.ubyte) , ('a' , np.ubyte)])   # 创建好类型
1
2
3
4
5
6
a = np.array([(1 , 2 , 3 , 4) , (5 , 6 , 7 , 8)] , dtype = dtype1)
a
'''
array([(1, 2, 3, 4), (5, 6, 7, 8)],
dtype=[('r', 'u1'), ('g', 'u1'), ('b', 'u1'), ('a', 'u1')])
'''

将array中大于3且小于8的数取反

用与运算

1
2
a = np.arange(11)
a[(a > 3) & (a < 8)]

求两个array的交集

np.intersect1d:求交集

1
2
3
4
5
6
7
a = np.array([1 , 2 , 4 , 3 , 5])
b = np.array([4 , 3 , 9 , 8])
np.intersect1d(a , b)

'''
array([3, 4])
'''

获取当前的今天和明天的日期

1
2
3
4
5
6
7
8
9
10
a = np.datetime64('today')  # 获取今天的日期
a
'''
numpy.datetime64('2020-10-2')
'''
b = a + np.timedelta64(1 , 'D')
b
'''
numpy.datetime64('2020-10-3')
'''
1
2
3
4
5
6
7
8
9
a = pd.Timestamp(a)
a
'''
Timestamp('2020-10-02 00:00:00')
'''
a.year # 获取年
'''
2020
'''

获取2016年7月的全部日期(以天为单位)

修改dtye使得字符串变为时间

1
2
3
4
5
6
7
8
9
10
11
np.arange('2016-07' , '2016-08' , dtype = 'datetime64[D]') # 换成Y就是年为单位,换成M就是月为单位
'''
array(['2016-07-01', '2016-07-02', '2016-07-03', '2016-07-04',
'2016-07-05', '2016-07-06', '2016-07-07', '2016-07-08',
'2016-07-09', '2016-07-10', '2016-07-11', '2016-07-12',
'2016-07-13', '2016-07-14', '2016-07-15', '2016-07-16',
'2016-07-17', '2016-07-18', '2016-07-19', '2016-07-20',
'2016-07-21', '2016-07-22', '2016-07-23', '2016-07-24',
'2016-07-25', '2016-07-26', '2016-07-27', '2016-07-28',
'2016-07-29', '2016-07-30', '2016-07-31'], dtype='datetime64[D]')
'''

提取正数的整数部分 (4 种方法)

有关np.where的用法,参考:https://www.cnblogs.com/massquantity/p/8908859.html

1
2
3
4
5
6
a = np.random.uniform(-5 , 10 , 10)
a
'''
array([-1.16420344, 3.95631045, 7.34675534, 8.03188626, 9.04815034,
6.5529346 , -1.46680238, 7.35797919, 3.9921186 , -3.76588798])
'''
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
# 方法1:
np.where(a > 0 , a - a%1 , a)

# 方法2:
np.where(a > 0 , a // 1 , a)

# 方法3:
np.where(a > 0 , np.floor(a) , a)

# 方法4:
np.where(a > 0 , a.astype(int) , a)

'''
array([-1.16420344, 3. , 7. , 8. , 9. ,
6. , -1.46680238, 7. , 3. , -3.76588798])
'''

np.trunc

np.trunc: 此函数返回输入数组元素的截断值。输入值$x$的截断值$t$是更接近0的整数, 比$x$更接近零。

1
2
3
4
5
6
7
8
9
10
11
a = np.array([-4.5375666 ,  1.54911477, -3.74455161, -2.41802724,  3.17252821, 8.00279135, -4.8008351 ,  5.53268438, -2.19521765,  8.72607842])
np.trunc(a)
'''
array([-4., 1., -3., -2., 3., 8., -4., 5., -2., 8.]
'''

# 对比np.floor,是取更小的元素
np.floor(a)
'''
array([-5., 1., -4., -3., 3., 8., -5., 5., -3., 8.])
'''

利用生成器方法来生成一个array

np.fromiter: Create a new 1-dimensional array from an iterable object.

1
2
3
4
5
6
7
8
def func(n):
for i in range(n):
yield i # 使得方法变成了生成器

np.formiter(func(10) , dtype = float)
'''
array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])
'''

np.add.reduce(ndarray , axis)

比np.sum更快

1
2
3
4
5
6
7
8
9
10
11
a = array([1 , 2 , 3 , 4])
np.add.reduce(a)
'''
10
'''

a = np.arange(12).reshape(3 , 4)
np.add.reduce(a , axis = 0)
'''
array([12, 15, 18, 21])
'''

使一个array 只读(read-only)

1
2
a = np.array([1 , 2 , 3])
a.flags.writeable = False #写权限为 False

将直角坐标变为极坐标

1
2
3
4
5
6
a = np.random.randn(10 , 2)  # 直角坐标
x = a[: , 0]
y = a[: , 1]

R = np.sqrt(x ** 2 + y ** 2) # 求长度
T = np.arctan2(y , x) # 弧度制 求角度

np.arctan2(y , x) 求出来的范围是$[-pi , pi]$

而np.arctan(y / x) 的范围是$[-pi/2 , pi/2]$

将矩阵的每列的最小值置1 (特殊的切片方式)

1
2
3
4
5
6
7
8
9
10
a = np.random.randn(5 , 5)
a[np.argmin(a) , np.arange(5)] = 1
a
'''
array([[ 1.03741769, 1. , 0.04588491, -1.56846048, -0.69702655],
[ 0.07827971, 0.66178652, 1. , -0.15189357, -0.93276632],
[ 0.48678279, -1.4055491 , 1.5417915 , -0.18737426, 0.40157406],
[ 1. , 0.49162913, -1.27824055, -0.04964457, 1. ],
[-0.91942993, -0.527093 , -2.30232503, 1. , -0.74834252]])
'''

求[0,1]x[1,0]区域内所有坐标

np.meshgrid: 生成网格坐标

np.linspace: 生成等差数列

1
2
3
4
5
6
7
8
9
10
11
12
a = np.ones((5 , 5) , dtype = [('x' , np.float) , ('y' , np.float)])

a['x'] , a['y'] = np.meshgrid(np.linspace(0 , 1 , 5) , np.linspace(0 , 1 , 5))
a # 所有坐标

'''
array([[(0. , 0. ), (0.25, 0. ), (0.5 , 0. ), (0.75, 0. ),(1. , 0. )],
[(0. , 0.25), (0.25, 0.25), (0.5 , 0.25), (0.75, 0.25),(1. , 0.25)],
[(0. , 0.5 ), (0.25, 0.5 ), (0.5 , 0.5 ), (0.75, 0.5 ),(1. , 0.5 )],
[(0. , 0.75), (0.25, 0.75), (0.5 , 0.75), (0.75, 0.75),(1. , 0.75)],
[(0. , 1. ), (0.25, 1. ), (0.5 , 1. ), (0.75, 1. ),(1. , 1. )]], dtype=[('x', '<f8'), ('y', '<f8')])
'''

np.meshgrid

1
2
3
4
5
6
7
8
9
10
11
12
13
# 对于2维矩阵来说,如果x坐标分成n份,y坐标分成m份,那么x的坐标矩阵就是mxn,y的坐标矩阵也是mxn
np.meshgrid(np.linspace(0 , 1 , 5) , np.linspace(0 , 1 , 4))

'''
[array([[0. , 0.25, 0.5 , 0.75, 1. ],
[0. , 0.25, 0.5 , 0.75, 1. ],
[0. , 0.25, 0.5 , 0.75, 1. ],
[0. , 0.25, 0.5 , 0.75, 1. ]]),
array([[0. , 0. , 0. , 0. , 0. ],
[0.33333333, 0.33333333, 0.33333333, 0.33333333, 0.33333333],
[0.66666667, 0.66666667, 0.66666667, 0.66666667, 0.66666667],
[1. , 1. , 1. , 1. , 1. ]])]
'''

np.subtract.outer

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
np.subtract.outer(x , y):
x: (1 , n)
y: (1 , m)
res: (n , m),其中res[i][j] = x[i] - y[j]

x = np.array([5 , 6 , 7 , 9])
y = np.array([2 , 3 , 4])
np.subtract.outer(x , y)

'''
array([[3, 2, 1],
[4, 3, 2],
[5, 4, 3],
[7, 6, 5]])
'''

np.add.outer 也是类似

查看numpy的各种类型的最大值和最小值

1
2
3
4
5
for dtype in (np.int16 , np.int32 , np.int64):
print(np.iinfo(dtype).max , np.iinfo(dtype).min)
print()
for dtype in (np.float16 , np.float32 , np.float64):
print(np.finfo(dtype).max , np.finfo(dtype).min)

np.atleast_2d

输入:arys1, arys2, … : array_like

函数作用:View inputs as arrays with at least two dimensions.

1
2
3
4
5
a = np.arange(12).reshape(6 , 2)
b , c = np.atleast_2d(a[: , 0] , a[: , 1])
b.shape , a[: , 0].shape
'''
((1, 6), (6,))
1
2
3
4
np.atleast_2d(1 , 2 , [1 , 2])  # 都转为2d 的shape
'''
[array([[1]]), array([[2]]), array([[1, 2]])]
'''

scipy.spatial.distance.cdist

更快地计算两点之间的距离

1
2
3
4
5
6
7
8
9
Parameters
----------
XA:ndarray , 第一个点的坐标集合,shape:(n , m),n是点的个数,m是点的坐标数量(比如是2d的就是x和y,如果是3d的就是x、y和z)。
XB:ndarray , 第二个点的坐标集合
metric:str or callable, optional,可选择计算什么距离
----------
return: ndarray,shape = (XA.shape[0] , XB.shape[0]),其中res[i][j] = XA[i]和XB[j]的距离

可参考:https://blog.csdn.net/kancy110/article/details/75675574
1
2
3
import scipy.spatial
a = np.arange(4 , 3) # 3d 坐标
scipy.spatial.distance.cdist(a , a , metric='euclidean') # 计算欧式距离(默认)

原地将float32 转为 int32

使用np.ndarray.view

numpy.ndarray.view中,提供对内存区域不同的切割方式,来完成数据类型的转换,而无须要对数据进行额外的copy,来节约内存空间。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
a = (np.random.rand(10) * 100).astype(np.float32)
b = a.view(np.int32)
print(id(a) , id(b)) # 虽然a 和 b 的地址不同,但是共享内存 ,修改b ,a 也会改变
'''
121892256 122304432
'''
print(np.shares_memory(a , b)) # 是否共享内存
'''
True
'''
b[:] = a # 这样保证了 赋值后 b 的 dtype 不变 如果 用 b = a 的话,b 会完全成为 a
print(b , type(b))
'''
[38 62 27 52 66 84 90 25 9 13] <class 'numpy.ndarray'>
'''
print(a) # 由于 修改了 b,所以 a 发生了改变
'''
[5.32e-44 8.69e-44 3.78e-44 7.29e-44 9.25e-44 1.18e-43 1.26e-43 3.50e-44 1.26e-44 1.82e-44]
'''

np.ndenumerate

numpy array 的 enumerate

1
2
3
4
5
6
7
8
9
10
11
12
a = np.arange(6).reshape(2 , 3)
for i , v in np.ndenumerate(a): # 获取index 和相对应的值
print(i , v)

'''
(0, 0) 0
(0, 1) 1
(0, 2) 2
(1, 0) 3
(1, 1) 4
(1, 2) 5
'''
1
2
3
4
5
6
7
8
9
10
11
12
a = np.arange(6).reshape(6)
for i , v in np.ndenumerate(a):
print(i , v)

'''
(0,) 0
(1,) 1
(2,) 2
(3,) 3
(4,) 4
(5,) 5
'''

np.ndindex

用于获取ndarray 的 index

1
2
3
4
5
6
7
8
9
10
11
12
a = np.arange(6).reshape(2 , 3)
for i in np.ndindex(a.shape):
print(i)

'''
(0, 0)
(0, 1)
(0, 2)
(1, 0)
(1, 1)
(1, 2)
'''

生成一个2D Gaussian-like array

这里不是生成一个array,其符合高斯分布,而是其坐标中$y$是$x$的高斯函数

高斯函数:$f(x)=a e^{-\frac{(x-b)^{2}}{2 c^{2}}}$

  • a表示得到曲线的高度;
  • b(μ)是指曲线在x轴的中心;
  • c(σ)指width(与半峰全宽有关);
1
2
3
4
5
6
x , y = np.meshgrid(np.linspace(-1 , 1 , 10) , np.linspace(-1 , 1 , 10)) # 生成网格坐标
mu = 0
sigma = 1
a = 1
r = np.sqrt(x ** 2 + y ** 2)
G = a * np.exp(-(r - mu) / (2 * (sigma) ** 2)) # 带入高斯公式

高斯函数和正态分布

高斯函数只是一种函数;

而正态分布是一个随机变量$x$,其概率密度符合高斯函数:

那么$x$就符合正态分布

np.put

1
2
3
4
5
6
7
8
9
10
Parameters
----------
a : ndarray
Target array. 要放入的ndarray
ind : array_like
Target indices, interpreted as integers. 要放入的位置
v : array_like
要放入的数字,如果len(v) < len(ind),那么就要重复v来填充
mode : {'raise', 'wrap', 'clip'}, optional
Specifies how out-of-bounds indices will behave. 越界后的处理,默认是'raise',即报错
1
2
3
4
5
a = np.arange(12)
np.put(a, [1 , 2 , 3 , 4 , 5], [100 , 1000])
'''
array([ 0, 100, 1000, 100, 1000, 100, 6, 7, -5, 9, 10, -5])
'''

np.random.choice

在指定范围内随机选择几个数

1
2
3
4
5
np.random.choice(range(10*10) , 3 , replace = False)  # replace: 是否可以重复采样

'''
array([45, 94, 47])
'''

keepdims 参数

就是保留原来ndarray的一些维度

1
2
3
4
5
6
7
8
9
# 比如:
a = np.random.randn(5 , 4)
a.mean(axis = 1).shape , a.mean(axis = 1 , keepdims = True).shape
'''
((5,), (5, 1))
'''
# 如果没有keepdims,那么输出的维度就有问题
# 比如:
a -= a.mean(axis = 1 , keepdims = True) # 如果不加keepdims,那么广播的时候会出问题

数组按第n列排序

1
2
a = np.random.randn(5 , 5)
a[a[: , 1].argsort()] # 按第1列排序,得到行索引

np.flat

当多维数组用一维坐标访问时用

1
2
3
4
5
61. Find the nearest value from a given value in an array (★★☆)

a = np.random.randn(3 , 3)
b = 0
a.flat[np.abs(a - b).argmin()] # argmin 得到的是 一维的坐标,访问一维坐标用 np.ndarray.flat

np.bincount

详细,参考:https://blog.csdn.net/xlinsist/article/details/51346523

1
2
3
4
5
6
7
8
9
a = np.random.randint(0 , 10 , 10)
a
'''
array([0, 1, 1, 3, 1, 0, 7, 4, 0, 4])
'''
np.bincount(a)
'''
array([3, 3, 0, 1, 2, 0, 0, 1], dtype=int64)
'''
1
2
3
4
5
6
7
8
9
65. How to accumulate elements of a vector (X) to an array (F) based on an index list (I)? (★★★)

x = np.array([1 , 2 , 3 , 10 , 5])
i = np.array([1 , 3 , 2 , 1 , 4])
F = np.bincount(i , x) # i 是 x , x 是 weights
F
'''
array([ 0., 11., 3., 2., 5.])
'''
1
2
3
4
5
6
7
8
9
10
11
12
13
64. Consider a given vector, how to add 1 to each element indexed by a second vector (be careful with repeated indices)? (★★★)

a = np.ones(10)
i = np.random.randint(0 , len(a) , 20)
print(i)
'''
[7 0 4 6 2 8 5 6 4 2 5 2 0 0 7 4 1 7 6 8]
'''
a += np.bincount(i , minlength = len(a))
a
'''
array([4., 2., 4., 1., 4., 3., 4., 4., 3., 1.])
'''
1
2
3
4
5
68. Considering a one-dimensional vector D, how to compute means of subsets of D using a vector S of same size describing subset indices? (★★★)

sum_v = np.bincount(S , weights = D)
sum_k = np.bincount(S)
sum_v / sum_k

np.unique

返回一个array的唯一的值序列,结果会排序

1
2
3
4
5
66. Considering a (w,h,3) image of (dtype=ubyte), compute the number of unique colors (★★★)

w , h = (16 , 16)
img = (np.random.uniform(0. , 255. , (w , h , 3))).astype(np.ubyte)
print(np.unique(img))

在切片时加 None,补充一维

1
2
3
4
5
a[: , : , None].shape , a[: , None , :].shape , a[None , : , :].shape , a[1:2 , : , None].shape

'''
((3, 4, 1), (3, 1, 4), (1, 3, 4), (1, 4, 1))
'''

如何交换array的一行

1
2
3
4
5
6
7
8
9
10
72. How to swap two rows of an array? (★★★)

a = np.arange(12).reshape(3 , 4)
a[[0 , 1]] = a[[1 , 0]] # 2d 的时候
a
'''
array([[ 4, 5, 6, 7],
[ 0, 1, 2, 3],
[ 8, 9, 10, 11]])
'''

np.ndarray.repeat

1
2
3
4
5
6
7
8
9
10
11
12
13
a
'''
array([[ 4, 5, 6, 7],
[ 0, 1, 2, 3],
[ 8, 9, 10, 11]])
'''

a.repeat(2 , axis = 1) # 每列重复两次
'''
array([[ 4, 4, 5, 5, 6, 6, 7, 7],
[ 0, 0, 1, 1, 2, 2, 3, 3],
[ 8, 8, 9, 9, 10, 10, 11, 11]])
'''

np.roll

1
2
3
4
5
6
7
8
9
10
11
Parameters
----------
a : array_like
Input array.
shift : int or tuple of ints
The number of places by which elements are shifted. If a tuple,
then `axis` must be a tuple of the same size, and each of the
given axes is shifted by the corresponding number. If an int
while `axis` is a tuple of ints, then the same value is used for
all given axes. 负数是反方向移动
axis
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
a = np.arange(12).reshape(3 , 4)
a
'''
array([[ 0, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11]])
'''

np.roll(a , (1 , 2) , axis = (0 , 1)) # 行向下移动1格,列上右移动两格
'''
array([[10, 11, 8, 9],
[ 2, 3, 0, 1],
[ 6, 7, 4, 5]])
'''

np.roll(a , (1 , 2)) # 行和列均先移动一个,再移动2格
'''
array([[ 9, 10, 11, 0],
[ 1, 2, 3, 4],
[ 5, 6, 7, 8]])
'''

np.roll(a , (-2 , 2)) # 相当于没移动
'''
array([[ 0, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11]])
'''
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
73. Consider a set of 10 triplets describing 10 triangles (with shared vertices), find the set of unique line segments composing all the triangles (★★★) 

题意:使用10个三元数的集合描述10个三角形,找出组成这些三角形边的集合

import numpy as np
a = np.random.randint(0 , 100 , (10 , 3))
b = np.roll(a.repeat(2 , axis = 1) , -1 , axis = 1)
b = b.reshape(len(b) * 3 , 2)
b = b.view(dtype = [('p0' , b.dtype) , ('p1' , b.dtype)]) # 使用view 来将原数组切分成二元组
c = np.unique(b)
c

'''
array([( 6, 57), (10, 22), (11, 21), (11, 41), (21, 42), (21, 87),
(22, 30), (30, 10), (40, 11), (41, 40), (42, 65), (45, 93),
(51, 88), (52, 75), (53, 82), (57, 84), (58, 45), (62, 66),
(65, 21), (66, 97), (70, 52), (75, 70), (82, 93), (84, 6),
(86, 51), (87, 11), (88, 86), (93, 53), (93, 58), (97, 62)],
dtype=[('p0', '<i4'), ('p1', '<i4')])
'''

np.repeat

1
2
3
4
5
6
7
8
9
10
Parameters
----------
a : array_like
Input array.
repeats : int or array of ints
The number of repetitions for each element. `repeats` is broadcasted
to fit the shape of the given axis.
axis : int, optional
The axis along which to repeat values. By default, use the
flattened input array, and return a flat output array.
1
2
3
4
5
6
7
8
np.repeat(np.arange(5) , [1 , 2 , 1 , 2 , 0])
'''
array([0, 1, 1, 2, 3, 3])
'''
np.repeat(np.arange(5) , 2)
'''
array([0, 0, 1, 1, 2, 2, 3, 3, 4, 4])
'''
1
2
3
4
5
6
7
8
9
10
11
12
74. Given an array C that is a bincount, how to produce an array A such that np.bincount(A) == C? (★★★)

c = np.bincount([1,1,2,3,4,4,6])
print(c)
'''
[0 2 1 1 2 0 1]
'''
a = np.repeat(np.arange(len(c)) , repeats = c)
a
'''
array([1, 1, 2, 3, 4, 4, 6])
'''

np.cumsum

Return the cumulative sum of the elements along a given axis. 即前缀和

1
2
3
4
5
a = np.arange(5)
np.cumsum(a , dtype = 'float64')
'''
array([ 0., 1., 3., 6., 10.])
'''
1
2
3
4
5
6
7
8
9
10
11
12
13
75. How to compute averages using a sliding window over an array? (★★★)

def Sliding_Window(a , n):
ret = np.cumsum(a , dtype = 'float64') # 先算前缀和
ret[n:] -= ret[:-n] # 这里 ret[i] - ret[i - n] 得到每一块的和
ret[n-1:] /= n
return ret[n-1:]

a = np.arange(20)
print(Sliding_Window(a , 3))
'''
[ 1. 2. 3. 4. 5. 6. 7. 8. 9. 10. 11. 12. 13. 14. 15. 16. 17. 18.]
'''

stride_tricks.as_strided

有关stride_tricks.as_strided 的用法,参考:https://zhuanlan.zhihu.com/p/64933417

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
76. Consider a one-dimensional array Z, build a two-dimensional array whose first row is (Z[0],Z[1],Z[2]) and each subsequent row is shifted by 1 (last row should be (Z[-3],Z[-2],Z[-1]) (★★★)

from numpy.lib import stride_tricks

def rolling(a , n):
shape = (a.size - n + 1 , n)
stride = (a.itemsize , a.itemsize)
b = stride_tricks.as_strided(a , shape , stride)
return b

rolling(np.arange(10) , 3)
'''
array([[0, 1, 2],
[1, 2, 3],
[2, 3, 4],
[3, 4, 5],
[4, 5, 6],
[5, 6, 7],
[6, 7, 8],
[7, 8, 9]])
'''

negate a boolean,将布尔数组取反

使用 np.logical_not

1
2
3
Z = np.random.randint(0,2,100)
print(Z)
np.logical_not(Z, out=Z) # out = Z 保证了 改变原来的数组

change the sign of a float inplace

使用np.negative

1
2
3
4
Z = np.random.uniform(-1.0,1.0,10)
print(Z)
np.negative(Z, out=Z)
Z

计算点到直线的距离

可以利用向量积来求,由于向量积是两个向量所围成的平行四边形的面积,所以可以利用面积/底边来求高,这个高即是点到直线的距离

np.cross 是求向量的向量积,有关向量积的内容参考:https://www.bilibili.com/video/av6341515

1
2
3
4
5
6
7
78. Consider 2 sets of points P0,P1 describing lines (2d) and a point p, how to compute distance from p to each line i (P0[i],P1[i])? (★★★)

p0 = np.random.uniform(-10 , 10 , (10 , 2)) # 直线的一端点
p1 = np.random.uniform(-10 , 10 , (10 , 2)) # 直线的另一端点
p = np.random.uniform(-10 , 10 , (1 , 2)) # 点

np.abs(np.cross(p0 - p , p1 - p, axis = 1)) / np.linalg.norm(p0 - p1 , axis = 1) # 距离 = 面积 / 底边

计算矩阵的秩

先对矩阵进行奇异值分解(SVD),然后利用 非0奇异值的个数 = 矩阵的秩这一性质来求

有关奇异值分解(SVD),参考: https://www.cnblogs.com/endlesscoding/p/10033527.htmlhttps://blog.csdn.net/weixin_43991178/article/details/104906655

1
2
3
4
5
6
Z = np.random.uniform(0,1,(10,8))
U, S, V = np.linalg.svd(Z) # Singular Value Decomposition
print(S)
rank = np.sum(S > 1e-10) # 非0 奇异值的个数 = 秩
print(rank)
print(U.shape , V.shape)

np.tensordot

np.tensordot,参考:

https://blog.csdn.net/weixin_28710515/article/details/90230842

1
2
3
4
5
6
7
8
9
10
86. Consider a set of p matrices wich shape (n,n) and a set of p vectors with shape (n,1). How to compute the sum of of the p matrix products at once? (result has shape (n,1)) (★★★)

题意:p个(n,n)矩阵和p个(n,1)向量相乘,得到的p个(n,1)向量相加

p , n = 10 , 20
v1 = np.random.randint(1 , 10 , (p , n , n))
v2 = np.random.randint(1 , 10 , (p , n , 1))
# v1[: , i , :]对应的是v1 矩阵的行集合
# v2[: , : , i]对应的是v2 的列集合
np.tensordot(v1 , v2 , axes = [[0 , 2] , [0 , 1]]) # 对应的v1 的行和 v2 的列向乘

np.argpartition

np.argpartition: 第一个参数 a 如果是a的话,是升序排列,如果是-a的话,就是降序排列 第二个参数kth,表示要选择的前k个元素,那么就把这前k个元素的索引排到前面,并不会排序,而只会筛选出这前几位元素,返回值是数组的索引。

1
2
3
4
5
6
7
8
9
10
11
12
a = np.random.randint(0 , 10 , (2 , 20))
a
'''
array([[7, 9, 1, 9, 1, 8, 6, 5, 5, 1, 8, 5, 3, 2, 2, 8, 2, 1, 7, 5],
[6, 6, 7, 0, 7, 8, 0, 2, 2, 9, 1, 3, 1, 5, 2, 1, 0, 3, 0, 0]])
'''
b = np.argpartition(-a , kth = 5 , axis = 1)
print(b)
'''
[[ 3 1 5 15 10 0 18 11 6 19 8 7 12 13 14 4 16 17 2 9]
[ 5 9 2 1 4 0 6 7 8 3 10 11 12 13 14 15 16 17 18 19]]
'''
1
2
3
4
5
6
7
8
9
89. How to get the n largest values of an array (★★★)

# 先将数组降序排列,然后选出前n 个元素
a = np.arange(1000)
np.random.shuffle(a)
a[np.argpartition(-a , kth = 10 , axis = 0)][:10]
'''
array([997, 998, 995, 996, 999, 994, 992, 993, 991, 990])
'''

记录数组和结构数组

记录数组和结构数组的定义和区别,参考:https://blog.csdn.net/qq_27825451/article/details/102457045

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
91. How to create a record array from a regular array? (★★★)
# 将普通数组转为记录数组
Z = np.array([("Hello", 2.5, 3),
("World", 3.6, 2)])

# 每一行是一种类型
R = np.core.records.fromarrays(Z.T,
names='col1, col2, col3',
formats = 'S8, f8, i8')
print(R)
print(Z)
print(Z.T)
print(R.col1) # 记录数组的特殊索引

'''
[(b'Hello', 2.5, 3) (b'World', 3.6, 2)]

[['Hello' '2.5' '3']
['World' '3.6' '2']]

[['Hello' 'World']
['2.5' '3.6']
['3' '2']]

[b'Hello' b'World']
'''

np.unpackbits

将8位长整型元素数组展开成二进制形式

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
95. Convert a vector of ints into a matrix binary representation (★★★)

I = np.array([0, 1, 2, 4, 8, 16, 32, 64, 128], dtype=np.uint8)

print(np.unpackbits(I[:, np.newaxis], axis=1)) # 将一个8位长整形元素展开成二进制形式

'''
[[0 0 0 0 0 0 0 0]
[0 0 0 0 0 0 0 1]
[0 0 0 0 0 0 1 0]
[0 0 0 0 0 1 0 0]
[0 0 0 0 1 0 0 0]
[0 0 0 1 0 0 0 0]
[0 0 1 0 0 0 0 0]
[0 1 0 0 0 0 0 0]
[1 0 0 0 0 0 0 0]]
'''

如何判断一个1d的array的所有元素都相等

1
2
3
4
5
a = np.array([1] * 5)
np.all(a[1:] == a[:-1]) # a[1:] == a[:-1] 相等于第1个元素和第2个元素向比较,第2个元素和第3个元素向比较,.....,知道第n-1个元素和第n个元素比较,如果都是True,那么说明所有元素都是相等的,如果有任何一个是False,那么就说明不是所有元素都是相等的。
'''
True
'''
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
94. Considering a 10x3 matrix, extract rows with unequal values (e.g. [2,2,3]) (★★★)

题意:筛选出不是所有元素都相等的行

a = np.random.randint(0 , 4 , (10 , 3))
a
'''
array([[2, 0, 0],
[0, 2, 3],
[0, 3, 2],
[2, 3, 3],
[2, 0, 3],
[3, 1, 0],
[1, 2, 3],
[0, 3, 0],
[2, 3, 0],
[1, 1, 1]])
'''
a[~np.all(a[: , 1:] == a[: , :-1] , axis = 1)]
'''
array([[2, 0, 0],
[0, 2, 3],
[0, 3, 2],
[2, 3, 3],
[2, 0, 3],
[3, 1, 0],
[1, 2, 3],
[0, 3, 0],
[2, 3, 0]])
'''

np.indices

np.indices函数的作用是返回一个代表网格中所有序号的矩阵(给定shape)。

详细,参考:https://blog.csdn.net/isunLt/article/details/107620828

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
90. Given an arbitrary number of vectors, build the cartesian product (every combinations of every item) (★★★)

题意:求笛卡尔积

求笛卡尔积,比如有三个array,分别是:[1 , 2 , 3] , [5 , 6] , [7 , 8] , 那么我们可以用下标来表示每一个笛卡尔积,那么就是从(0 , 0 , 0)一直到(2 , 1 , 1),这就类似于网格坐标,所以我们可以先求网格坐标。

def cartesian(arrays):
arrays = [np.asarray(x) for x in arrays]
shape = (x.shape[0] for x in arrays) # 在本样例中 (3 , 2 , 2)
ix = np.indices(shape)
ix = ix.reshape(3 , -1).T # 求出网格
for i , arr in enumerate(arrays):
ix[: , i] = arrays[i][ix[: , i]] # 将网格坐标替换成相应的值
return ix
print(cartesian([[1 , 2 , 3] , [4 , 5] , [6 , 7]]))

如何实现等距采样

等距采样就是每隔相等的距离采一次样。

可以采用一维线性插值法来获取不同的距离所对应的样本的值,一维线性插值法的函数:np.interp,详细,参考:https://blog.csdn.net/hfutdog/article/details/87386901

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
98. Considering a path described by two vectors (X,Y), how to sample it using equidistant samples (★★★)?

题意:给定一条路径的(x , y),让你等距采样出(x , y)点

import matplotlib.pyplot as plt
phi = np.arange(0,10*np.pi,0.1)
a =1
x = a*phi*np.cos(phi)
y = a*phi*np.sin(phi)
print(x.shape)
dr = (np.diff(x)**2 + np.diff(y)**2)**.5 # np.diff 计算 后一个元素减去前一个元素,这一步计算的是相邻两个点的距离
r = np.zeros_like(x)
r[1:] = np.cumsum(dr)
print(r)
r_int = np.linspace(0, r.max(), 80) # 创建等差数列
x_int = np.interp(r_int, r, x) #插值 一维线性插值,
y_int = np.interp(r_int, r, y) #插值
plt.subplot(133)
plt.plot(x , y , x_int,y_int)

np.diff

后一个元素减去前一个元素

1
2
3
4
np.diff(np.array([1 , 2 , 3 , 4]))
'''
array([1, 1, 1])
'''

np.inner

返回两个向量的内积

1
2
3
4
5
np.inner(np.array([1 , 2 , 3]) , np.array([1 , 2 , 3]))

'''
14
'''

ndarray 的 slice 切片操作

1
2
3
4
5
6
7
a = np.arange(25).reshape(5 , 5)
a[[slice(0 , 3) , slice(0 , 3)]]
'''
array([[ 0, 1, 2],
[ 5, 6, 7],
[10, 11, 12]])
'''

x.setitem(i, y) <==> x[i]=y

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
85. Create a 2D array subclass such that Z[i,j] == Z[j,i] (★★★)

题意:让我们创造出来的array不仅一开始的属性是对称矩阵,而且修改后也能是对称矩阵。

class Symetric(np.ndarray):
# x.__setitem__(i, y) <==> x[i]=y
# 继承自ndarray 的 __setitem__,不重写的话是 x.__setitem__((i,j), y) <==> x[(i,j)]=y
# 重写后: x.__setitem__(i, y) <==> x[(i,j)] = x[(j,i)] = y
def __setitem__(self, index, value):
i,j = index
super(Symetric, self).__setitem__((i,j), value)
super(Symetric, self).__setitem__((j,i), value)

def symetric(Z):
# np.asarray(Z + Z.T - np.diag(Z.diagonal())) 这样保证了一开始的时候是对称矩阵,但是不能使得修改某几个元素后的矩阵是对称矩阵,所以我们需要创造一个子类,使得这个子类具有这样的属性
return np.asarray(Z + Z.T - np.diag(Z.diagonal())).view(Symetric)

a = np.random.randint(0,10,(5,5))
print(a)

'''
[[2 7 2 0 6]
[4 9 9 2 6]
[0 3 1 1 6]
[6 1 8 0 9]
[8 4 2 2 7]]
'''

S = symetric(a)
S[2,3] = 42
print(S)

'''
[[ 2 11 2 6 14]
[11 9 12 3 10]
[ 2 12 1 42 8]
[ 6 3 42 0 11]
[14 10 8 11 7]]
'''

ndarray 子类实体的创建

参考:https://blog.csdn.net/SAKURASANN/article/details/102750468

63. Create an array class that has a name attribute (★★☆)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
# python中cls代表的是类的本身,相对应的self则是类的一个实例对象。
class NamedArray(np.ndarray):
# __new__ 用于创建一个实体
def __new__(cls, array, name="no name"):
#print(array)
obj = np.asarray(array).view(cls) # 得到一个ndarray 的子类的一个实体
#obj = super().__new__(cls , array)
obj.name = name
return obj
# def __array_finalize__(self, obj):
# if obj is None: return
# self.info = getattr(obj, 'name', "no name")

Z = NamedArray(np.arange(10), "lzclzc")
print (Z.name , type(Z) , Z)

__new__

参考: