查看numpy的版本和配置

import numpy as np
np.__version__     # 查看版本

np.show_config()  # 查看配置

np.zeros

默认$dtype$是浮点型的0

1 2	np.zeros(10) # 一维 np.zeros((3 , 3)) # 多维

np.ndarray.itemsize

查看$ndarray$中一个元素的内存

1
2
3

a = np.random.randn(3 , 4)
# 输出：'float64' , 8 , 8 * 12 = 96(ndarray所占的内存)
print(a.dtype , a.itemsize , a.itemsize * a.size)

reverse一个vector

1	np.arange(10)[::-1] # 利用切片来转置

寻找ndarray的非0元素的下标

使用np.nonzero()

a = np.arange(12).reshape(3 , 4)
a[1][1] = 0
''' 
array([[ 0,  1,  2,  3],
       [ 4,  0,  6,  7],
       [ 8,  9, 10, 11]])
'''
np.nonzero(a)  # 由于是2维的，所以len = 2，返回值是tuple
'''
(array([0, 0, 0, 1, 1, 1, 2, 2, 2, 2], dtype=int64),
 array([1, 2, 3, 0, 2, 3, 0, 1, 2, 3], dtype=int64))
'''

创建一个单位矩阵

np.eye

np.eye(3 , 3)   # dtype 为浮点型
'''
array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])
'''

np.random.random()

Return random floats in the half-open interval $[0.0, 1.0)$.

1	np.random.random((3 , 3))

寻找ndarray的最大值和最小值

a = np.arange(12).reshape(3 , 4)
a.max() # 寻找最大值，11
a.max(axis = 0)   # 输出：array([ 8,  9, 10, 11])
a.min()

使一个array的周围都是0

a = np.ones((5 , 5))
a[: , [0 , -1]] = 0
a[[0 , -1] , :] = 0
a
'''
array([[0., 0., 0., 0., 0.],
       [0., 1., 1., 1., 0.],
       [0., 1., 1., 1., 0.],
       [0., 1., 1., 1., 0.],
       [0., 0., 0., 0., 0.]])
'''

np.nan和np.inf的一些运算

0 * np.nan # nan
np.nan == np.nan # False
np.inf > np.nan , np.inf < np.nan , np.inf == np.nan # (False , False , False)
np.nan - np.nan  # nan
np.inf - np.inf  # nan
np.nan in set([1 , 2 , np.nan])  # True
type(np.nan)  # float
0.3 == 0.1 * 3  # False

np.diag(v , k)

v: array_like

按照k和v来判断矩阵的大小(是方阵)

np.diag(1 + np.arange(4) , k = 1)  # 对角线上一格  5 * 5
'''
array([[0, 1, 0, 0, 0],
       [0, 0, 2, 0, 0],
       [0, 0, 0, 3, 0],
       [0, 0, 0, 0, 4],
       [0, 0, 0, 0, 0]])
'''
np.diag(1 + np.arange(4) , k = -1)   # 对角线下一格  5 * 5
'''
array([[0, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [0, 2, 0, 0, 0],
       [0, 0, 3, 0, 0],
       [0, 0, 0, 4, 0]])
'''
np.diag(1 + np.arange(4) , k = 0)   # 在对角线上  4 * 4
'''
array([[1, 0, 0, 0],
       [0, 2, 0, 0],
       [0, 0, 3, 0],
       [0, 0, 0, 4]])
'''

创建一个checkerboard pattern的矩阵

方法1：

a = np.ones((8 , 8) , dtype = 'int')
a[::2 , ::2] = 0
a[1::2 , 1::2] = 0
a
'''
array([[0, 1, 0, 1, 0, 1, 0, 1],
       [1, 0, 1, 0, 1, 0, 1, 0],
       [0, 1, 0, 1, 0, 1, 0, 1],
       [1, 0, 1, 0, 1, 0, 1, 0],
       [0, 1, 0, 1, 0, 1, 0, 1],
       [1, 0, 1, 0, 1, 0, 1, 0],
       [0, 1, 0, 1, 0, 1, 0, 1],
       [1, 0, 1, 0, 1, 0, 1, 0]])
'''

方法2：

np.tile(np.array([[0 , 1] , [1 , 0]]) , (4 , 4))
'''
[[0 , 1]
[1 , 0]]  的块在axis = 0 和 axis = 1 上都重复4次
'''

'''
array([[0, 1, 0, 1, 0, 1, 0, 1],
       [1, 0, 1, 0, 1, 0, 1, 0],
       [0, 1, 0, 1, 0, 1, 0, 1],
       [1, 0, 1, 0, 1, 0, 1, 0],
       [0, 1, 0, 1, 0, 1, 0, 1],
       [1, 0, 1, 0, 1, 0, 1, 0],
       [0, 1, 0, 1, 0, 1, 0, 1],
       [1, 0, 1, 0, 1, 0, 1, 0]])
'''

给定一个array的维度，如何查看第i个元素的下标是多少

np.unravel_index

1 2	np.unravel_index(99 , (6 , 7 , 8)) # shape = (6 , 7 , 8)的第99个元素的小标为 # (1, 5, 3)

创建一个dtype来描述(RGBA)

1	dtype1 = np.dtype([('r' , np.ubyte) , ('g' , np.ubyte) , ('b' , np.ubyte) , ('a' , np.ubyte)]) # 创建好类型

a = np.array([(1 , 2 , 3 , 4) , (5 , 6 , 7 , 8)] , dtype = dtype1)
a
'''
array([(1, 2, 3, 4), (5, 6, 7, 8)],
      dtype=[('r', 'u1'), ('g', 'u1'), ('b', 'u1'), ('a', 'u1')])
'''

将array中大于3且小于8的数取反

用与运算

1 2	a = np.arange(11) a[(a > 3) & (a < 8)]

求两个array的交集

np.intersect1d：求交集

a = np.array([1 , 2 , 4 , 3 , 5])
b = np.array([4 , 3 , 9 , 8])
np.intersect1d(a , b)

'''
array([3, 4])
'''

获取当前的今天和明天的日期

a = np.datetime64('today')  # 获取今天的日期
a
'''
numpy.datetime64('2020-10-2')
'''
b = a + np.timedelta64(1 , 'D')
b
'''
numpy.datetime64('2020-10-3')
'''

a = pd.Timestamp(a)
a
'''
Timestamp('2020-10-02 00:00:00')
'''
a.year    # 获取年
'''
2020
'''

获取2016年7月的全部日期（以天为单位）

修改dtye使得字符串变为时间

np.arange('2016-07' , '2016-08' , dtype = 'datetime64[D]') # 换成Y就是年为单位，换成M就是月为单位
'''
array(['2016-07-01', '2016-07-02', '2016-07-03', '2016-07-04',
       '2016-07-05', '2016-07-06', '2016-07-07', '2016-07-08',
       '2016-07-09', '2016-07-10', '2016-07-11', '2016-07-12',
       '2016-07-13', '2016-07-14', '2016-07-15', '2016-07-16',
       '2016-07-17', '2016-07-18', '2016-07-19', '2016-07-20',
       '2016-07-21', '2016-07-22', '2016-07-23', '2016-07-24',
       '2016-07-25', '2016-07-26', '2016-07-27', '2016-07-28',
       '2016-07-29', '2016-07-30', '2016-07-31'], dtype='datetime64[D]')
'''

提取正数的整数部分 (4 种方法)

有关np.where的用法，参考：https://www.cnblogs.com/massquantity/p/8908859.html

a = np.random.uniform(-5 , 10 , 10)
a
'''
array([-1.16420344,  3.95631045,  7.34675534,  8.03188626,  9.04815034,
        6.5529346 , -1.46680238,  7.35797919,  3.9921186 , -3.76588798])
'''

# 方法1：
np.where(a > 0 , a - a%1 , a)

# 方法2：
np.where(a > 0 , a // 1 , a)

# 方法3：
np.where(a > 0 , np.floor(a) , a)

# 方法4：
np.where(a > 0 , a.astype(int) , a)

'''
array([-1.16420344,  3.        ,  7.        ,  8.        ,  9.        ,
        6.        , -1.46680238,  7.        ,  3.        , -3.76588798])
'''

np.trunc

np.trunc: 此函数返回输入数组元素的截断值。输入值$x$的截断值$t$是更接近0的整数, 比$x$更接近零。

a = np.array([-4.5375666 ,  1.54911477, -3.74455161, -2.41802724,  3.17252821, 8.00279135, -4.8008351 ,  5.53268438, -2.19521765,  8.72607842])
np.trunc(a)
'''
array([-4.,  1., -3., -2.,  3.,  8., -4.,  5., -2.,  8.]
'''

# 对比np.floor，是取更小的元素
np.floor(a)
'''
array([-5.,  1., -4., -3.,  3.,  8., -5.,  5., -3.,  8.])
'''

利用生成器方法来生成一个array

np.fromiter: Create a new 1-dimensional array from an iterable object.

def func(n):
    for i in range(n):
        yield i      # 使得方法变成了生成器

np.formiter(func(10) , dtype = float)
'''
array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])
'''

np.add.reduce(ndarray , axis)

比np.sum更快

a = array([1 , 2 , 3 , 4])
np.add.reduce(a)
'''
10
'''

a = np.arange(12).reshape(3 , 4)
np.add.reduce(a , axis = 0)
'''
array([12, 15, 18, 21])
'''

使一个array 只读(read-only)

1 2	a = np.array([1 , 2 , 3]) a.flags.writeable = False #写权限为 False

将直角坐标变为极坐标

a = np.random.randn(10 , 2)  # 直角坐标
x = a[: , 0]
y = a[: , 1]

R = np.sqrt(x ** 2 + y ** 2)  # 求长度
T = np.arctan2(y , x)   # 弧度制  求角度

np.arctan2(y , x) 求出来的范围是$[-pi , pi]$

而np.arctan(y / x) 的范围是$[-pi/2 , pi/2]$

将矩阵的每列的最小值置1 (特殊的切片方式)

a = np.random.randn(5 , 5)
a[np.argmin(a) , np.arange(5)] = 1
a
'''
array([[ 1.03741769,  1.        ,  0.04588491, -1.56846048, -0.69702655],
       [ 0.07827971,  0.66178652,  1.        , -0.15189357, -0.93276632],
       [ 0.48678279, -1.4055491 ,  1.5417915 , -0.18737426,  0.40157406],
       [ 1.        ,  0.49162913, -1.27824055, -0.04964457,  1.        ],
       [-0.91942993, -0.527093  , -2.30232503,  1.        , -0.74834252]])
'''

求[0,1]x[1,0]区域内所有坐标

np.meshgrid: 生成网格坐标

np.linspace: 生成等差数列

a = np.ones((5 , 5) , dtype = [('x' , np.float) , ('y' , np.float)])

a['x'] , a['y'] = np.meshgrid(np.linspace(0 , 1 , 5) , np.linspace(0 , 1 , 5))
a   # 所有坐标

'''
array([[(0.  , 0.  ), (0.25, 0.  ), (0.5 , 0.  ), (0.75, 0.  ),(1.  , 0.  )],
       [(0.  , 0.25), (0.25, 0.25), (0.5 , 0.25), (0.75, 0.25),(1.  , 0.25)],
       [(0.  , 0.5 ), (0.25, 0.5 ), (0.5 , 0.5 ), (0.75, 0.5 ),(1.  , 0.5 )],
       [(0.  , 0.75), (0.25, 0.75), (0.5 , 0.75), (0.75, 0.75),(1.  , 0.75)],
       [(0.  , 1.  ), (0.25, 1.  ), (0.5 , 1.  ), (0.75, 1.  ),(1.  , 1.  )]], dtype=[('x', '<f8'), ('y', '<f8')])
'''

np.meshgrid

# 对于2维矩阵来说，如果x坐标分成n份，y坐标分成m份，那么x的坐标矩阵就是mxn，y的坐标矩阵也是mxn
np.meshgrid(np.linspace(0 , 1 , 5) , np.linspace(0 , 1 , 4))

'''
[array([[0.  , 0.25, 0.5 , 0.75, 1.  ],
        [0.  , 0.25, 0.5 , 0.75, 1.  ],
        [0.  , 0.25, 0.5 , 0.75, 1.  ],
        [0.  , 0.25, 0.5 , 0.75, 1.  ]]),
 array([[0.        , 0.        , 0.        , 0.        , 0.        ],
        [0.33333333, 0.33333333, 0.33333333, 0.33333333, 0.33333333],
        [0.66666667, 0.66666667, 0.66666667, 0.66666667, 0.66666667],
        [1.        , 1.        , 1.        , 1.        , 1.        ]])]
'''

np.subtract.outer

np.subtract.outer(x , y):
x: (1 , n)
y: (1 , m)
res: (n , m)，其中res[i][j] = x[i] - y[j]

x = np.array([5 , 6 , 7 , 9])
y = np.array([2 , 3 , 4])
np.subtract.outer(x , y)

'''
array([[3, 2, 1],
       [4, 3, 2],
       [5, 4, 3],
       [7, 6, 5]])
'''

np.add.outer 也是类似

查看numpy的各种类型的最大值和最小值

for dtype in (np.int16 , np.int32 , np.int64):
    print(np.iinfo(dtype).max , np.iinfo(dtype).min)
print()
for dtype in (np.float16 , np.float32 , np.float64):
    print(np.finfo(dtype).max , np.finfo(dtype).min)

np.atleast_2d

输入：arys1, arys2, … : array_like

函数作用：View inputs as arrays with at least two dimensions.

a = np.arange(12).reshape(6 , 2)
b , c = np.atleast_2d(a[: , 0] , a[: , 1])
b.shape , a[: , 0].shape
'''
((1, 6), (6,))

np.atleast_2d(1 , 2 , [1 , 2])  # 都转为2d 的shape
'''
[array([[1]]), array([[2]]), array([[1, 2]])]
'''

scipy.spatial.distance.cdist

更快地计算两点之间的距离

Parameters
----------
XA：ndarray , 第一个点的坐标集合，shape:(n , m)，n是点的个数，m是点的坐标数量（比如是2d的就是x和y，如果是3d的就是x、y和z）。
XB：ndarray , 第二个点的坐标集合
metric：str or callable, optional，可选择计算什么距离
----------
return: ndarray，shape = (XA.shape[0] , XB.shape[0])，其中res[i][j] = XA[i]和XB[j]的距离

可参考：https://blog.csdn.net/kancy110/article/details/75675574

1
2
3

import scipy.spatial
a = np.arange(4 , 3)  # 3d 坐标
scipy.spatial.distance.cdist(a , a , metric='euclidean') # 计算欧式距离（默认）

原地将float32 转为 int32

使用np.ndarray.view

numpy.ndarray.view中，提供对内存区域不同的切割方式，来完成数据类型的转换，而无须要对数据进行额外的copy，来节约内存空间。

a = (np.random.rand(10) * 100).astype(np.float32)
b = a.view(np.int32)  
print(id(a) , id(b))  # 虽然a 和 b 的地址不同，但是共享内存 ，修改b ，a 也会改变
'''
121892256 122304432
'''
print(np.shares_memory(a , b))   # 是否共享内存
'''
True  
'''
b[:] = a  # 这样保证了 赋值后 b 的 dtype 不变 如果 用 b = a 的话，b 会完全成为 a 
print(b , type(b))
'''
[38 62 27 52 66 84 90 25  9 13] <class 'numpy.ndarray'>
'''
print(a)    # 由于 修改了 b，所以 a 发生了改变 
'''
[5.32e-44 8.69e-44 3.78e-44 7.29e-44 9.25e-44 1.18e-43 1.26e-43 3.50e-44 1.26e-44 1.82e-44]
'''

np.ndenumerate

numpy array 的 enumerate

a = np.arange(6).reshape(2 , 3)
for i , v in np.ndenumerate(a):  # 获取index 和相对应的值
    print(i , v)

'''
(0, 0) 0
(0, 1) 1
(0, 2) 2
(1, 0) 3
(1, 1) 4
(1, 2) 5
'''

a = np.arange(6).reshape(6)
for i , v in np.ndenumerate(a):
    print(i , v)

'''
(0,) 0
(1,) 1
(2,) 2
(3,) 3
(4,) 4
(5,) 5
'''

np.ndindex

用于获取ndarray 的 index

a = np.arange(6).reshape(2 , 3)
for i in np.ndindex(a.shape):
    print(i)

'''
(0, 0)
(0, 1)
(0, 2)
(1, 0)
(1, 1)
(1, 2)
'''

生成一个2D Gaussian-like array

这里不是生成一个array，其符合高斯分布，而是其坐标中$y$是$x$的高斯函数

高斯函数：$f(x)=a e^{-\frac{(x-b)^{2}}{2 c^{2}}}$

a表示得到曲线的高度；
b(μ)是指曲线在x轴的中心；
c(σ)指width(与半峰全宽有关)；

x , y = np.meshgrid(np.linspace(-1 , 1 , 10) , np.linspace(-1 , 1 , 10)) # 生成网格坐标
mu = 0
sigma = 1
a = 1
r = np.sqrt(x ** 2 + y ** 2)
G = a * np.exp(-(r - mu) / (2 * (sigma) ** 2))  # 带入高斯公式

高斯函数和正态分布

高斯函数只是一种函数；

而正态分布是一个随机变量$x$，其概率密度符合高斯函数：

$f(x)=\frac{1}{\sqrt{2 \pi} \sigma} \exp \left(-\frac{(x-\mu)^{2}}{2 \sigma^{2}}\right)$

那么$x$就符合正态分布

np.put

Parameters
----------
a : ndarray
    Target array.  要放入的ndarray
ind : array_like
    Target indices, interpreted as integers.  要放入的位置
v : array_like
    要放入的数字，如果len(v) < len(ind)，那么就要重复v来填充
mode : {'raise', 'wrap', 'clip'}, optional
    Specifies how out-of-bounds indices will behave.  越界后的处理，默认是'raise'，即报错

a = np.arange(12)
np.put(a, [1 , 2 , 3 , 4 , 5], [100 , 1000])
'''
array([   0,  100, 1000,  100, 1000,  100,    6,    7,   -5,    9,   10,   -5])
'''

np.random.choice

在指定范围内随机选择几个数

np.random.choice(range(10*10) , 3 , replace = False)  # replace: 是否可以重复采样

'''
array([45, 94, 47])
'''

keepdims 参数

就是保留原来ndarray的一些维度

# 比如:
a = np.random.randn(5 , 4)
a.mean(axis = 1).shape , a.mean(axis = 1 , keepdims = True).shape
'''
((5,), (5, 1))
'''
# 如果没有keepdims，那么输出的维度就有问题
# 比如:
a -= a.mean(axis = 1 , keepdims = True) # 如果不加keepdims，那么广播的时候会出问题

数组按第n列排序

1 2	a = np.random.randn(5 , 5) a[a[: , 1].argsort()] # 按第1列排序，得到行索引

np.flat

当多维数组用一维坐标访问时用

61. Find the nearest value from a given value in an array (★★☆)

a = np.random.randn(3 , 3)
b = 0
a.flat[np.abs(a - b).argmin()]   # argmin 得到的是 一维的坐标，访问一维坐标用  np.ndarray.flat

np.bincount

详细，参考：https://blog.csdn.net/xlinsist/article/details/51346523

a = np.random.randint(0 , 10 , 10)
a
'''
array([0, 1, 1, 3, 1, 0, 7, 4, 0, 4])
'''
np.bincount(a)
'''
array([3, 3, 0, 1, 2, 0, 0, 1], dtype=int64)
'''

65. How to accumulate elements of a vector (X) to an array (F) based on an index list (I)? (★★★)

x = np.array([1 , 2 , 3 , 10 , 5])
i = np.array([1 , 3 , 2 , 1 , 4])
F = np.bincount(i , x) # i 是 x , x 是 weights
F
'''
array([ 0., 11.,  3.,  2.,  5.])
'''

64. Consider a given vector, how to add 1 to each element indexed by a second vector (be careful with repeated indices)? (★★★)

a = np.ones(10)
i = np.random.randint(0 , len(a) , 20)
print(i)
'''
[7 0 4 6 2 8 5 6 4 2 5 2 0 0 7 4 1 7 6 8]
'''
a += np.bincount(i , minlength = len(a))
a
'''
array([4., 2., 4., 1., 4., 3., 4., 4., 3., 1.])
'''

68. Considering a one-dimensional vector D, how to compute means of subsets of D using a vector S of same size describing subset indices? (★★★)

sum_v = np.bincount(S , weights = D)
sum_k = np.bincount(S)
sum_v / sum_k

np.unique

返回一个array的唯一的值序列，结果会排序

66. Considering a (w,h,3) image of (dtype=ubyte), compute the number of unique colors (★★★)

w , h = (16 , 16)
img = (np.random.uniform(0. , 255. , (w , h , 3))).astype(np.ubyte)
print(np.unique(img))

在切片时加 None，补充一维

a[: , : , None].shape , a[: , None , :].shape , a[None , : , :].shape , a[1:2 , : , None].shape

'''
((3, 4, 1), (3, 1, 4), (1, 3, 4), (1, 4, 1))
'''

如何交换array的一行

72. How to swap two rows of an array? (★★★)

a = np.arange(12).reshape(3 , 4)
a[[0 , 1]] = a[[1 , 0]]   # 2d 的时候
a
'''
array([[ 4,  5,  6,  7],
       [ 0,  1,  2,  3],
       [ 8,  9, 10, 11]])
'''

np.ndarray.repeat

a
'''
array([[ 4,  5,  6,  7],
       [ 0,  1,  2,  3],
       [ 8,  9, 10, 11]])
'''

a.repeat(2 , axis = 1)   # 每列重复两次
'''
array([[ 4,  4,  5,  5,  6,  6,  7,  7],
       [ 0,  0,  1,  1,  2,  2,  3,  3],
       [ 8,  8,  9,  9, 10, 10, 11, 11]])
'''

np.roll

Parameters
----------
a : array_like
        Input array.
shift : int or tuple of ints
        The number of places by which elements are shifted.  If a tuple,
        then `axis` must be a tuple of the same size, and each of the
        given axes is shifted by the corresponding number.  If an int
        while `axis` is a tuple of ints, then the same value is used for
        all given axes. 负数是反方向移动
axis

a = np.arange(12).reshape(3 , 4)
a
'''
array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])
'''

np.roll(a , (1 , 2) , axis = (0 , 1))  # 行向下移动1格，列上右移动两格
'''
array([[10, 11,  8,  9],
       [ 2,  3,  0,  1],
       [ 6,  7,  4,  5]])
'''

np.roll(a , (1 , 2)) # 行和列均先移动一个，再移动2格
'''
array([[ 9, 10, 11,  0],
       [ 1,  2,  3,  4],
       [ 5,  6,  7,  8]])
'''

np.roll(a , (-2 , 2))  # 相当于没移动
'''
array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])
'''

73. Consider a set of 10 triplets describing 10 triangles (with shared vertices), find the set of unique line segments composing all the triangles (★★★) 

题意：使用10个三元数的集合描述10个三角形，找出组成这些三角形边的集合

import numpy as np
a = np.random.randint(0 , 100 , (10 , 3))
b = np.roll(a.repeat(2 , axis = 1) , -1 , axis = 1)
b = b.reshape(len(b) * 3 , 2)
b = b.view(dtype = [('p0' , b.dtype) , ('p1' , b.dtype)]) # 使用view 来将原数组切分成二元组
c = np.unique(b)
c

'''
array([( 6, 57), (10, 22), (11, 21), (11, 41), (21, 42), (21, 87),
       (22, 30), (30, 10), (40, 11), (41, 40), (42, 65), (45, 93),
       (51, 88), (52, 75), (53, 82), (57, 84), (58, 45), (62, 66),
       (65, 21), (66, 97), (70, 52), (75, 70), (82, 93), (84,  6),
       (86, 51), (87, 11), (88, 86), (93, 53), (93, 58), (97, 62)],
      dtype=[('p0', '<i4'), ('p1', '<i4')])
'''

np.repeat

Parameters
----------
a : array_like
       Input array.
repeats : int or array of ints
       The number of repetitions for each element.  `repeats` is broadcasted
        to fit the shape of the given axis.
axis : int, optional
       The axis along which to repeat values.  By default, use the
        flattened input array, and return a flat output array.

np.repeat(np.arange(5) , [1 , 2 , 1 , 2 , 0])
'''
array([0, 1, 1, 2, 3, 3])
'''
np.repeat(np.arange(5) , 2)
'''
array([0, 0, 1, 1, 2, 2, 3, 3, 4, 4])
'''

74. Given an array C that is a bincount, how to produce an array A such that np.bincount(A) == C? (★★★)

c = np.bincount([1,1,2,3,4,4,6])
print(c)
'''
[0 2 1 1 2 0 1]
'''
a = np.repeat(np.arange(len(c)) , repeats = c)
a
'''
array([1, 1, 2, 3, 4, 4, 6])
'''

np.cumsum

Return the cumulative sum of the elements along a given axis. 即前缀和

a = np.arange(5)
np.cumsum(a , dtype = 'float64')
'''
array([ 0.,  1.,  3.,  6., 10.])
'''

75. How to compute averages using a sliding window over an array? (★★★)

def Sliding_Window(a , n):
    ret = np.cumsum(a , dtype = 'float64')  # 先算前缀和
    ret[n:] -= ret[:-n]    # 这里 ret[i] - ret[i - n] 得到每一块的和
    ret[n-1:] /= n
    return ret[n-1:]

a = np.arange(20)
print(Sliding_Window(a , 3))
'''
[ 1.  2.  3.  4.  5.  6.  7.  8.  9. 10. 11. 12. 13. 14. 15. 16. 17. 18.]
'''

stride_tricks.as_strided

有关stride_tricks.as_strided 的用法，参考:https://zhuanlan.zhihu.com/p/64933417

76. Consider a one-dimensional array Z, build a two-dimensional array whose first row is (Z[0],Z[1],Z[2]) and each subsequent row is shifted by 1 (last row should be (Z[-3],Z[-2],Z[-1]) (★★★)

from numpy.lib import stride_tricks

def rolling(a , n):
    shape = (a.size - n + 1 , n)
    stride = (a.itemsize , a.itemsize)
    b = stride_tricks.as_strided(a , shape , stride)
    return b

rolling(np.arange(10) , 3)
'''
array([[0, 1, 2],
       [1, 2, 3],
       [2, 3, 4],
       [3, 4, 5],
       [4, 5, 6],
       [5, 6, 7],
       [6, 7, 8],
       [7, 8, 9]])
'''

negate a boolean，将布尔数组取反

使用 np.logical_not

1
2
3

Z = np.random.randint(0,2,100)
print(Z)
np.logical_not(Z, out=Z)   # out = Z  保证了 改变原来的数组

change the sign of a float inplace

使用np.negative

Z = np.random.uniform(-1.0,1.0,10)
print(Z)
np.negative(Z, out=Z)
Z

计算点到直线的距离

可以利用向量积来求，由于向量积是两个向量所围成的平行四边形的面积，所以可以利用面积/底边来求高，这个高即是点到直线的距离

np.cross 是求向量的向量积，有关向量积的内容参考：https://www.bilibili.com/video/av6341515

78. Consider 2 sets of points P0,P1 describing lines (2d) and a point p, how to compute distance from p to each line i (P0[i],P1[i])? (★★★)

p0 = np.random.uniform(-10 , 10 , (10 , 2))  # 直线的一端点
p1 = np.random.uniform(-10 , 10 , (10 , 2))  # 直线的另一端点
p = np.random.uniform(-10 , 10 , (1 , 2))    # 点

np.abs(np.cross(p0 - p , p1 - p,  axis = 1)) / np.linalg.norm(p0 - p1 , axis = 1)  # 距离 = 面积 / 底边

计算矩阵的秩

先对矩阵进行奇异值分解（SVD)，然后利用 非0奇异值的个数 = 矩阵的秩这一性质来求

有关奇异值分解(SVD)，参考: https://www.cnblogs.com/endlesscoding/p/10033527.html 和 https://blog.csdn.net/weixin_43991178/article/details/104906655

Z = np.random.uniform(0,1,(10,8))
U, S, V = np.linalg.svd(Z) # Singular Value Decomposition
print(S)
rank = np.sum(S > 1e-10)   # 非0 奇异值的个数 = 秩
print(rank)
print(U.shape , V.shape)

np.tensordot

np.tensordot，参考：

https://blog.csdn.net/weixin_28710515/article/details/90230842

86. Consider a set of p matrices wich shape (n,n) and a set of p vectors with shape (n,1). How to compute the sum of of the p matrix products at once? (result has shape (n,1)) (★★★)

题意：p个(n,n)矩阵和p个(n,1)向量相乘，得到的p个(n,1)向量相加

p , n = 10 , 20
v1 = np.random.randint(1 , 10 , (p , n , n))
v2 = np.random.randint(1 , 10 , (p , n , 1))
# v1[: , i , :]对应的是v1 矩阵的行集合
# v2[: , : , i]对应的是v2 的列集合
np.tensordot(v1 , v2 , axes = [[0 , 2] , [0 , 1]])  # 对应的v1 的行和 v2 的列向乘

np.argpartition

np.argpartition: 第一个参数 a 如果是a的话，是升序排列，如果是-a的话，就是降序排列第二个参数kth，表示要选择的前k个元素，那么就把这前k个元素的索引排到前面，并不会排序，而只会筛选出这前几位元素，返回值是数组的索引。

a = np.random.randint(0 , 10 , (2 , 20))
a
'''
array([[7, 9, 1, 9, 1, 8, 6, 5, 5, 1, 8, 5, 3, 2, 2, 8, 2, 1, 7, 5],
       [6, 6, 7, 0, 7, 8, 0, 2, 2, 9, 1, 3, 1, 5, 2, 1, 0, 3, 0, 0]])
'''
b = np.argpartition(-a , kth = 5 , axis = 1)
print(b)
'''
[[ 3  1  5 15 10  0 18 11  6 19  8  7 12 13 14  4 16 17  2  9]
 [ 5  9  2  1  4  0  6  7  8  3 10 11 12 13 14 15 16 17 18 19]]
'''

89. How to get the n largest values of an array (★★★)

# 先将数组降序排列，然后选出前n 个元素
a = np.arange(1000)
np.random.shuffle(a)
a[np.argpartition(-a , kth = 10 , axis = 0)][:10]
'''
array([997, 998, 995, 996, 999, 994, 992, 993, 991, 990])
'''

记录数组和结构数组

记录数组和结构数组的定义和区别，参考：https://blog.csdn.net/qq_27825451/article/details/102457045

91. How to create a record array from a regular array? (★★★)
# 将普通数组转为记录数组
Z = np.array([("Hello", 2.5, 3),
              ("World", 3.6, 2)])

# 每一行是一种类型
R = np.core.records.fromarrays(Z.T,
                               names='col1, col2, col3',
                               formats = 'S8, f8, i8')
print(R)
print(Z)
print(Z.T)
print(R.col1) # 记录数组的特殊索引

'''
[(b'Hello', 2.5, 3) (b'World', 3.6, 2)]

[['Hello' '2.5' '3']
 ['World' '3.6' '2']]

[['Hello' 'World']
 ['2.5' '3.6']
 ['3' '2']]

[b'Hello' b'World']
'''

np.unpackbits

将8位长整型元素数组展开成二进制形式

95. Convert a vector of ints into a matrix binary representation (★★★)

I = np.array([0, 1, 2, 4, 8, 16, 32, 64, 128], dtype=np.uint8)

print(np.unpackbits(I[:, np.newaxis], axis=1))  # 将一个8位长整形元素展开成二进制形式

'''
[[0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 1]
 [0 0 0 0 0 0 1 0]
 [0 0 0 0 0 1 0 0]
 [0 0 0 0 1 0 0 0]
 [0 0 0 1 0 0 0 0]
 [0 0 1 0 0 0 0 0]
 [0 1 0 0 0 0 0 0]
 [1 0 0 0 0 0 0 0]]
'''

如何判断一个1d的array的所有元素都相等

a = np.array([1] * 5)
np.all(a[1:] == a[:-1])  # a[1:] == a[:-1] 相等于第1个元素和第2个元素向比较，第2个元素和第3个元素向比较，.....，知道第n-1个元素和第n个元素比较，如果都是True，那么说明所有元素都是相等的，如果有任何一个是False，那么就说明不是所有元素都是相等的。
'''
True
'''

94. Considering a 10x3 matrix, extract rows with unequal values (e.g. [2,2,3]) (★★★)

题意：筛选出不是所有元素都相等的行

a = np.random.randint(0 , 4 , (10 , 3))
a
'''
array([[2, 0, 0],
       [0, 2, 3],
       [0, 3, 2],
       [2, 3, 3],
       [2, 0, 3],
       [3, 1, 0],
       [1, 2, 3],
       [0, 3, 0],
       [2, 3, 0],
       [1, 1, 1]])
'''
a[~np.all(a[: , 1:] == a[: , :-1] , axis = 1)]
'''
array([[2, 0, 0],
       [0, 2, 3],
       [0, 3, 2],
       [2, 3, 3],
       [2, 0, 3],
       [3, 1, 0],
       [1, 2, 3],
       [0, 3, 0],
       [2, 3, 0]])
'''

np.indices

np.indices函数的作用是返回一个代表网格中所有序号的矩阵（给定shape）。

详细，参考：https://blog.csdn.net/isunLt/article/details/107620828

90. Given an arbitrary number of vectors, build the cartesian product (every combinations of every item) (★★★)

题意：求笛卡尔积

求笛卡尔积，比如有三个array，分别是：[1 , 2 , 3] , [5 , 6] , [7 , 8] , 那么我们可以用下标来表示每一个笛卡尔积，那么就是从(0 , 0 , 0)一直到(2 , 1 , 1)，这就类似于网格坐标，所以我们可以先求网格坐标。

def cartesian(arrays):
    arrays = [np.asarray(x)  for x in arrays]
    shape = (x.shape[0]  for x in arrays)  # 在本样例中 (3 , 2 , 2)
    ix = np.indices(shape)
    ix = ix.reshape(3 , -1).T     # 求出网格
    for i , arr in enumerate(arrays):
        ix[: , i] = arrays[i][ix[: , i]]    # 将网格坐标替换成相应的值
    return ix
print(cartesian([[1 , 2 , 3] , [4 , 5] , [6 , 7]]))

如何实现等距采样

等距采样就是每隔相等的距离采一次样。

可以采用一维线性插值法来获取不同的距离所对应的样本的值，一维线性插值法的函数：np.interp，详细，参考：https://blog.csdn.net/hfutdog/article/details/87386901

98. Considering a path described by two vectors (X,Y), how to sample it using equidistant samples (★★★)?

题意：给定一条路径的(x , y)，让你等距采样出(x , y)点

import matplotlib.pyplot as plt
phi = np.arange(0,10*np.pi,0.1)
a =1
x = a*phi*np.cos(phi)
y = a*phi*np.sin(phi)
print(x.shape)
dr = (np.diff(x)**2 + np.diff(y)**2)**.5    # np.diff 计算 后一个元素减去前一个元素，这一步计算的是相邻两个点的距离
r = np.zeros_like(x)
r[1:] = np.cumsum(dr)
print(r)
r_int = np.linspace(0, r.max(), 80)    # 创建等差数列
x_int = np.interp(r_int, r, x)  #插值   一维线性插值，
y_int = np.interp(r_int, r, y)  #插值
plt.subplot(133)
plt.plot(x , y , x_int,y_int)

np.diff

后一个元素减去前一个元素

np.diff(np.array([1 , 2 , 3 , 4]))
'''
array([1, 1, 1])
'''

np.inner

返回两个向量的内积

np.inner(np.array([1 , 2 , 3]) , np.array([1 , 2 , 3]))

'''
14
'''

ndarray 的 slice 切片操作

a = np.arange(25).reshape(5 , 5)
a[[slice(0 , 3) , slice(0 , 3)]]
'''
array([[ 0,  1,  2],
       [ 5,  6,  7],
       [10, 11, 12]])
'''

x.setitem(i, y) <==> x[i]=y

85. Create a 2D array subclass such that Z[i,j] == Z[j,i] (★★★)

题意：让我们创造出来的array不仅一开始的属性是对称矩阵，而且修改后也能是对称矩阵。

class Symetric(np.ndarray):
#     x.__setitem__(i, y) <==> x[i]=y
#    继承自ndarray 的 __setitem__，不重写的话是 x.__setitem__((i,j), y) <==> x[(i,j)]=y
#    重写后： x.__setitem__(i, y) <==> x[(i,j)] = x[(j,i)] = y
    def __setitem__(self, index, value):
        i,j = index
        super(Symetric, self).__setitem__((i,j), value)
        super(Symetric, self).__setitem__((j,i), value)

def symetric(Z):
     # np.asarray(Z + Z.T - np.diag(Z.diagonal())) 这样保证了一开始的时候是对称矩阵，但是不能使得修改某几个元素后的矩阵是对称矩阵，所以我们需要创造一个子类，使得这个子类具有这样的属性
    return np.asarray(Z + Z.T - np.diag(Z.diagonal())).view(Symetric)

a = np.random.randint(0,10,(5,5))
print(a)

'''
[[2 7 2 0 6]
 [4 9 9 2 6]
 [0 3 1 1 6]
 [6 1 8 0 9]
 [8 4 2 2 7]]
'''

S = symetric(a)
S[2,3] = 42
print(S)

'''
[[ 2 11  2  6 14]
 [11  9 12  3 10]
 [ 2 12  1 42  8]
 [ 6  3 42  0 11]
 [14 10  8 11  7]]
'''

ndarray 子类实体的创建

参考：https://blog.csdn.net/SAKURASANN/article/details/102750468

63. Create an array class that has a name attribute (★★☆)

# python中cls代表的是类的本身，相对应的self则是类的一个实例对象。
class NamedArray(np.ndarray):
       # __new__ 用于创建一个实体
    def __new__(cls, array, name="no name"):
        #print(array)
        obj = np.asarray(array).view(cls)  # 得到一个ndarray 的子类的一个实体
        #obj = super().__new__(cls , array)
        obj.name = name
        return obj
#     def __array_finalize__(self, obj):
#         if obj is None: return
#         self.info = getattr(obj, 'name', "no name")

Z = NamedArray(np.arange(10), "lzclzc")
print (Z.name , type(Z) , Z)

new

参考：