NumPy快速入门指南

Joe.Ye • 2023-04-01 • AI

简介

NumPy系统是Python的一种开源的数值计算扩展。这种工具可用来存储和处理大型矩阵。

参考：https://docs.scipy.org/doc/numpy/user/quickstart.html
100 numpy exercises：http://www.labri.fr/perso/nrougier/teaching/numpy.100/
试验性的Numpy教程：http://reverland.org/python/2012/08/22/numpy
From Python to Numpy：http://www.labri.fr/perso/nrougier/from-python-to-numpy/

快速入门指南

# -*- coding: utf-8 -*-
import numpy as np
from numpy import pi
from numpy import newaxis

'''
[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]]
<class 'numpy.ndarray'>
2
(3, 5)
15
int32
4
<memory at 0x000001D910BB6DC8>
'''
def numpyBasic():
    a = np.arange(15).reshape(3, 5)
    print(a)
    # 当数组包含的元素太多时, 会省略中间的元素, 只打印角落的元素
    # 如果想禁用这个行为, 强制打印所有的元素, 可以开启set_printoptions选项: np.set_printoptions(threshold=np.nan)
    # 还原成省略效果: np.set_printoptions(threshold=1000)
    # 设置打印浮点数的小数位数: np.set_printoptions(precision=4)  # 设置打印浮点数的小数位数，默认是8位
    print(type(a))
    print(a.ndim)  # 数组的轴数(即rank)
    print(a.shape)  # 数组的维度，返回的是一个元组，元组的长度值刚好是ndim
    print(a.size)  # 数组元素的个数
    print(a.dtype)  # 数组元素的类型
    print(a.itemsize)  # 数组元素的字节大小
    print(a.data)  # 数组包含的实际数据(一般情况下不会用到这个属性，都是通过索引来访问元素)

def numpyCreateArray():
    # 可以从普通的python列表或元组来创建
    a1 = np.array([2, 3, 4])
    print(a1)
    print(a1.dtype)
    a2 = np.array([1.2, 3.5, 5.1])
    print(a2.dtype)
    a3 = np.array([(1.5, 2.3), (4, 5, 6)])
    print(a3)
    print(a3.dtype)
    # 在创建数组的时候, 指定数据类型
    a4 = np.array([[1, 2], [3, 4]], dtype=complex)
    print(a4)
    # zeros函数创建初始值为0的数组
    a5 = np.zeros((3, 4))
    print(a5)
    # ones创建初始值为1的数组
    a6 = np.ones((3, 4))
    print(a6)
    # empty创建未初始化的随机数组
    a7 = np.empty((2, 5))
    print(a7)
    # 为了创建序列函数, Numpy也提供了类似range函数的方法
    a8 = np.arange(10, 30, 5)
    print(a8)
    a9 = np.arange(0, 2, 0.3)
    print(a9)
    a10 = np.linspace(0, 2, 9)
    print(a10)
    x = np.linspace(0, 2 * pi, 100)
    f = np.sin(x)

# 数组的算术运算会自动作用于每个元素，并返回一个新的数组
def numpyBaseAlgorithm():
    a = np.array([20, 30, 40, 50])
    b = np.arange(4)
    c = a - b
    print(c)
    d = b**2
    print(d)
    e = 10 * np.sin(a)
    print(e)
    f = a < 35
    print(f)

# *返回的是每个元素相乘的结果, 要实现矩阵乘法, 需要使用dot函数
def numpyMatrixAlgorithm():
    a = np.array([[1, 1],
                  [0, 1]])
    b = np.array([[2, 0],
                  [3, 4]])
    c = a * b  # 对应位置的元素相乘
    print(c)
    d = a.dot(b)  # 矩阵乘法
    print(d)
    e = np.dot(a, b)  # 另一种形式的矩阵乘法
    print(e)

# 一些操作, 如+=和*=是直接修改原有的数组, 而不是新建一个
def numpyMatrixAlgorithmSelf():
    a = np.ones((2, 3), dtype=int)
    print(a)
    b = np.random.random((2, 3))
    print(b)
    print(a.dtype)
    print(b.dtype)
    b += a
    print(b)
    # a += b
    # print(a)

# 当不同类型的数组运算操作时, 总是向精度更高的自动转换
def numpyMatrixAlgorithmPrecision():
    a = np.ones(3, dtype=np.int32)
    b = np.linspace(0, np.pi, 3)
    c = a + b
    print(c)
    print(c.dtype)
    d = np.exp(c * 1j)
    print(d)
    print(d.dtype)

# ndarray包含了很多一元运算. 如求和等
def numpyMatrixAlgorithmUnary():
    a = np.arange(15).reshape(3, 5)
    print(a)
    print(a.sum())
    print(a.min())
    print(a.max())

# 默认情况下, 数组操作都是作用于每一个元素, 而不管它的维度. 但是, 我们也可以通过axis参数来限定操作的轴
def numpyMatrixAlgorithmAxis():
    a = np.arange(12).reshape(3, 4)
    b = a.sum(axis=0)  # 计算每一列的和
    print(b)
    c = a.min(axis=1)  # 计算每一行的最小值
    print(c)
    d = a.cumsum(axis=1)  # 每一行累积和
    print(d)

# Numpy提供了很多常见的数学上的运算, 如sin, cos, exp. 在Numpy中, 我们称这些为"universal functions"(ufunc)
def numpyUniversal():
    a = np.arange(3)
    b = np.exp(a)
    print(b)
    c = np.sqrt(a)
    print(c)
    d = np.add(a, a)
    print(d)

'''
int32
[  0   1   8  27  64 125 216 343 512 729]
8
[ 8 27 64]
[ 0  8 64]
[1000    1 1000   27 1000  125  216  343  512  729]
[ 729  512  343  216  125 1000   27 1000    1 1000]
[1000    1 1000   27 1000  125  216  343  512  729]
9.999999999999998 1.0 9.999999999999998 3.0 9.999999999999998 5.0 5.999999999999999 6.999999999999999 7.999999999999999 8.999999999999998 
'''
# 一维数组的索引，切片，迭代跟普通的Python列表一样
def numpyArrayIndex():
    a = np.arange(10) ** 3
    print(a.dtype)
    print(a)
    print(a[2])
    print(a[2:5])
    print(a[:6:2])  # 等价于a[0:6:2]
    a[:6:2] = 1000
    print(a)
    print(a[::-1])  # 反转数组a
    for i in a:
        i **= (1 / 3.)
    print(a)
    for i in range(len(a)):
        print(a[i] ** (1 / 3.), end=' ')

def f(x, y):
    return 10*x + y

'''
[[ 0  1  2  3]
 [10 11 12 13]
 [20 21 22 23]
 [30 31 32 33]
 [40 41 42 43]]
23
[ 1 11 21 31 41]
[ 1 11 21 31 41]
[[10 11 12 13]
 [20 21 22 23]]
'''
def numpyArrayMatrix():
    a = np.fromfunction(f, (5, 4), dtype=int)
    print(a)
    # help(np.fromfunction)
    print(a[2, 3])
    print(a[0:5, 1])
    print(a[:, 1])
    print(a[1:3, :])
    # 当索引数少于轴数时，缺失的索引认为是全切片
    print(a[-1])  # 等价于 a[-1, :]

# 可以使用...来表示全切片，它代表补全剩下的所有索引
# x[1,2,...]等价于x[1,2,:,:,:]
# x[...,3]等价于x[:,:,:,:,3]
# x[4,...,5,:]等价于x[4,:,:,5,:]
def numpyArrayDot():
    a = np.array([[[0, 1, 2],
                  [10, 12, 13]],
                  [[100, 101, 102],
                  [110, 112, 113]]])
    print(a.shape)
    print(a[1, ...])
    print(a[..., 2])

'''
[0 1 2 3]
[10 11 12 13]
[20 21 22 23]
[30 31 32 33]
[40 41 42 43]
0 1 2 3 10 11 12 13 20 21 22 23 30 31 32 33 40 41 42 43 
'''
def numpyMatrixEnumerate():
    a = np.array([[0,  1,  2,  3],
        [10, 11, 12, 13],
        [20, 21, 22, 23],
        [30, 31, 32, 33],
        [40, 41, 42, 43]])
    # 多维数组的迭代是根据第一个轴来操作的
    for row in a:
        print(row)
    # 如果想遍历每个元素，可以使用flat属性
    for element in a.flat:
        print(element, end=' ')

'''
(3, 4)
[[0. 8. 3. 0.]
 [5. 2. 3. 5.]
 [4. 6. 2. 3.]]
[0. 8. 3. 0. 5. 2. 3. 5. 4. 6. 2. 3.]
[[0. 8.]
 [3. 0.]
 [5. 2.]
 [3. 5.]
 [4. 6.]
 [2. 3.]]
[[0. 5. 4.]
 [8. 2. 6.]
 [3. 3. 2.]
 [0. 5. 3.]]
(4, 3)
[[0. 8. 3. 0. 5. 2.]
 [3. 5. 4. 6. 2. 3.]]
[[0. 8. 3. 0.]
 [5. 2. 3. 5.]
 [4. 6. 2. 3.]]
'''
def numpyMatrixShape():
    a = np.floor(10 * np.random.random((3, 4)))
    print(a.shape)
    print(a)
    # 返回降维的数组
    print(a.ravel())
    # 直接修改shape
    print(a.reshape(6, 2))
    # 数组转置
    print(a.T)
    print(a.T.shape)
    # reshape返回修改后的数组，不改变数组本身，但是resize函数直接修改原数组
    a.resize((2, 6))
    print(a)
    # 如果一个维度为的是-1, 那么reshape函数会自动计算它的值
    print(a.reshape(3, -1))

'''
[[3. 0.]
 [3. 9.]]
[[9. 4.]
 [8. 1.]]
[[3. 0.]
 [3. 9.]
 [9. 4.]
 [8. 1.]]
[[3. 0. 9. 4.]
 [3. 9. 8. 1.]]
'''
def numpyMatrixCombine():
    # 多个数组可以根据不同的轴组合在一起
    a = np.floor(10 * np.random.random((2, 2)))
    print(a)
    b = np.floor(10 * np.random.random((2, 2)))
    print(b)
    print(np.vstack((a, b)))
    print(np.hstack((a, b)))

'''
[[1. 1. 2. 9.]
 [4. 4. 0. 3.]]
[[4. 3.]
 [2. 8.]]
[[4.]
 [2.]]
[[4. 3.]
 [2. 8.]]
[[4. 3.]
 [2. 8.]]
[1 2 3 0 4]
'''
## 数组合并
def numpyColumnStack():
    a = np.array([[1., 1.], [4., 4.]])
    b = np.array([[2., 9.], [0., 3.]])
    # 操作2维数组，等效于hstack
    print(np.column_stack((a, b)))
    a = np.array([4., 2.])
    b = np.array([3., 8.])
    # 操作1维数组，返回2维数组，a,b分别为2维数组的列
    print(np.column_stack((a, b)))
    # 将1维数组变成2维数组
    print(a[:, newaxis])
    # 都是操作二维数组，下面两个操作column_stack和hstack等效
    print(np.column_stack((a[:, newaxis], b[:, newaxis])))
    print(np.hstack((a[:, newaxis], b[:, newaxis])))
    # 另外不论什么数组，row_stack函数等效于vstack
    # 通常来说，2维以上的数组，hstack基于第2根轴做运算，vstack基于第1根轴
    # concatenate函数额外多接受一个参数，可以指定基于哪根轴做数组的合并操作
    # 另外, r_和c_函数对于在一个轴上组合数据相当实用，他们允许使用范围符号
    print(np.r_[1:4, 0, 4])

## 数组切割
def numpyMatrixSplit():
    a = np.array([[ 9.,  0.,  2.,  0.,  0.,  4.,  1.,  6.,  4.,  8.,  3.,  9.],
       [ 5.,  3.,  0.,  5.,  5.,  8.,  0.,  5.,  6.,  3.,  8.,  7.]])
    print(a)
    # 横轴切割成3个数组
    print(np.hsplit(a, 3))
    # 纵轴切割成2个数组
    # vsplit可以基于垂直轴切割，array_split可以指定基于哪个轴切割
    print(np.vsplit(a, 2))
    # 基于第3和第4列切割
    print(np.hsplit(a, (3, 4)))

def f(x):
    print(id(x))

'''
[ 0  1  2  3  4  5  6  7  8  9 10 11]
True
(12,)
(3, 4)
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
2889321266432
2889321266432
- - - - - - - - - - - - - - - - - - - -
False
True
False
(3, 4)
[[   0    1    2    3 1234    5]
 [   6    7    8    9   10   11]]
[[   0    1    2    3]
 [1234    5    6    7]
 [   8    9   10   11]]
[[ 1  2]
 [ 5  6]
 [ 9 10]]
[[10 10]
 [10 10]
 [10 10]]
[[   0   10   10    3]
 [1234   10   10    7]
 [   8   10   10   11]]
- - - - - - - - - - - - - - - - - - - -
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
False
False
[[9999    1    2    3]
 [   4    5    6    7]
 [   8    9   10   11]]
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
'''
# 当进行数组运算和改变数组时，有时候数据是被复制到一个新的数组，有时候不是
def numpyCopy():
    ## 不复制
    a = np.arange(12)
    print(a)
    b = a  # 不会有新对象产生
    print(b is a)  # a和b是同一个数组
    print(b.shape)
    b.shape = 3, 4  # 改变b的shape, a也同样变化
    print(a.shape)
    print(a)
    # Python中使用可变参数时，可以看做是引用传参，因此函数调用会产生新的数组
    print(id(a))
    f(a)

    print('- - - - - - - - - - - - - - - - - - - -')

    ## 视图(View)和浅复制(Shallow Copy)
    # 不同的数组可以共享数据，view函数可以创造一个数据相同的新数组
    a = np.array([[0, 1, 2, 3],
           [4, 5, 6, 7],
           [8, 9, 10, 11]])
    c = a.view()
    print(c is a)  # c和a不是同一个数组
    print(c.base is a)  # c是a的数据的视图
    print(c.flags.owndata)
    c.shape = 2, 6  # a的不会改变
    print(a.shape)
    c[0, 4] = 1234  # a的数据发生改变
    print(c)
    print(a)
    # 一个数组的切片返回的就是它的视图
    s = a[:, 1:3]  # s是a的视图
    print(s)
    s[:] = 10  # s[:]是a的视图
    print(s)
    print(a)

    print('- - - - - - - - - - - - - - - - - - - -')

    ## 深度复制(Deep Copy)
    a = np.arange(12).reshape((3, 4))
    print(a)
    d = a.copy()
    print(d is a)
    print(d.base is a)
    d[0, 0] = 9999
    print(d)
    print(a)

## 广播机制
def numpyBroadcast():
    # 广播主要描述于numpy对于不同shape的数组如何进行算术运算。受限于一些特定约束
    # 一般都是小的数组扩展为大的数组，以便能计算
    # 通常情况下，numpy操作的数组必须是相同shape的
    a = np.array([1.0, 2.0, 3.0])
    b = np.array([2.0, 2.0, 2.0])
    print(a * b)
    # 当数组的shape满足某些特定约束时，numpy的广播机制可以使这个约束更宽松。最简单的就是广播例子就是当数组和一个标量操作时
    a = np.array([1.0, 2.0, 3.0])
    b = 2.0
    print(a * b)
    # 我们可以认为标量b被扩展为了和a同样shape的数组，b中的新元素就是原来标量的拷贝
    # 这个扩展策略仅仅是概念上的，实际上Numpy足够聪明，能自动使用标量做运算，而不需要复制任何东西
    # 所以广播运算从计算内存上来说更优秀
    # 要能满足广播，必须符合下面两条规则：
    # 1. 广播之后，输出数组的shape是输入数组shape的各个轴上的最大值，然后沿着较大shape属性的方向复制延伸
    # 2. 要进行广播机制，要么两个数组的shape属性一样，要么其中有一个数组的shape属性必须有一个等于1

'''
[  0   1   4   9  16  25  36  49  64  81 100 121]
[1 1 3 8 5]
  print(a[k])  # 等价于a[i, j]
[ 1  1  9 64 25]
[[ 9 16]
 [81 49]]
- - - - - - - - - - - - - - - - - - - -
[[[  0   0   0]
  [255   0   0]
  [  0 255   0]
  [  0   0   0]]

 [[  0   0   0]
  [  0   0 255]
  [255 255 255]
  [  0   0   0]]]
- - - - - - - - - - - - - - - - - - - -
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
[[ 2  5]
 [ 7 11]]
[[ 2  6]
 [ 6 10]]
(3, 2, 2)
[[[ 2  1]
  [ 3  3]]

 [[ 6  5]
  [ 7  7]]

 [[10  9]
  [11 11]]]
[array([[0, 1],
       [1, 2]]), array([[2, 1],
       [3, 3]])]
[[ 2  5]
 [ 7 11]]
[[[0 1]
  [1 2]]

 [[2 1]
  [3 3]]]
- - - - - - - - - - - - - - - - - - - -
[0 1 2 3 4]
[0 0 2 0 0]
[0 1 2 3 4]
[2 1 3 3 4]
[0 1 2 3 4]
[1 1 3 3 4]
- - - - - - - - - - - - - - - - - - - -
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
[[False False False False]
 [False  True  True  True]
 [ True  True  True  True]]
[ 5  6  7  8  9 10 11]
[[0 1 2 3]
 [4 0 0 0]
 [0 0 0 0]]
- - - - - - - - - - - - - - - - - - - -
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
[[ 4  5  6  7]
 [ 8  9 10 11]]
[[ 4  5  6  7]
 [ 8  9 10 11]]
[[ 0  2]
 [ 4  6]
 [ 8 10]]
[ 4 10]
'''
## 索引
def numpyIndices():
    # numpy除了支持普通的python方式的索引和切片之外，还支持整数数组或布尔数组索引
    a = np.arange(12) ** 2
    i = np.array([1, 1, 3, 8, 5])
    print(a)
    print(i)
    print(a[i])  # 返回a中再索引i的元素
    j = np.array([[3, 4], [9, 7]])
    print(a[j])  # 二维数组索引，返回a中再索引j的元素

    print('- - - - - - - - - - - - - - - - - - - -')

    # 当数组索引作用在多维数组时，是根据数组的第一个维度来索引的
    palette = np.array([[0, 0, 0],
                        [255, 0, 0],
                        [0, 255, 0],
                        [0, 0, 255],
                        [255, 255, 255]])
    image = np.array([[0, 1, 2, 0],
                      [0, 3, 4, 0]])
    print(palette[image])

    print('- - - - - - - - - - - - - - - - - - - -')

    # 索引同样可以是多维的，但是必须是相同的shape
    a = np.arange(12).reshape(3, 4)
    print(a)
    i = np.array([[0, 1],
                  [1, 2]])
    j = np.array([[2, 1],
                  [3, 3]])
    print(a[i, j])
    print(a[i, 2])
    b = a[:, j]  # a[0, j], a[1, j], a[2, j]
    print(b.shape)
    print(b)
    # 同样，我们可以把i和j放在一个列表里，然后用列表做索引
    k = [i, j]
    print(k)
    print(a[k])  # 等价于a[i, j]
    s = np.array([i, j])  # 三维数组
    print(s)

    print('- - - - - - - - - - - - - - - - - - - -')

    # 同样可以给数组索引赋值
    a = np.arange(5)
    print(a)
    a[[1, 3, 4]] = 0
    print(a)
    # 但是当列表包含相同的索引时，这个位置会被赋值多次，最终只保留最后一次的值
    a = np.arange(5)
    print(a)
    a[[0, 0, 2]] = [1, 2, 3]
    print(a)
    # 上面看起来很合理，但是当使用+=符号的时候，结果和我们想的可能不太一样
    a = np.arange(5)
    print(a)
    a[[0, 0, 2]] += 1  # 尽管索引中出现了两次0，但是第0个元素它只加了1次
    print(a)

    print('- - - - - - - - - - - - - - - - - - - -')

    # 布尔数组索引
    # 当使用数字数组索引时，我们提供了哪些元素要被索引的信息
    # 但是当使用布尔数组时，我们是明确哪些元素需要，哪些元素不需要
    a = np.arange(12).reshape((3, 4))
    print(a)
    b = a > 4
    print(b)
    print(a[b])
    a[b] = 0  # 所有大于4的元素都赋值为0
    print(a)

    print('- - - - - - - - - - - - - - - - - - - -')

    a = np.arange(12).reshape(3, 4)
    b1 = np.array([False, True, True])
    b2 = np.array([True, False, True, False])
    print(a)
    print(a[b1, :])  # 选择行
    print(a[b1])  # 同上
    print(a[:, b2])  # 选择列
    print(a[b1, b2])

'''
(2, 3., b'World')
[1 2]
[2. 3.]
[b'Hello' b'World']
'''
## 字符串索引
# Numpy提供了创建结构化的数组的能力，可以通过列名来操作数据
def numpyStringIndices():
    # Numpy提供了创建结构化的数组的能力，可以通过列名来操作数据
    x = np.array([(1, 2., 'Hello'), (2, 3., 'World')], dtype=[('foo', 'i4'), ('bar', 'f4'), ('baz', 'S10')])
    print(x[1])
    print(x['foo'])
    print(x['bar'])
    print(x['baz'])

if __name__ == '__main__':
    numpyBasic()
    # numpyCreateArray()
    # numpyBaseAlgorithm()
    # numpyMatrixAlgorithm()
    # numpyMatrixAlgorithmSelf()
    # numpyMatrixAlgorithmPrecision()
    # numpyMatrixAlgorithmUnary()
    # numpyMatrixAlgorithmAxis()
    # numpyUniversal()
    # numpyArrayIndex()
    # numpyArrayMatrix()
    # numpyArrayDot()
    # numpyMatrixEnumerate()
    # numpyMatrixShape()
    # numpyMatrixCombine()
    # numpyColumnStack()
    # numpyMatrixSplit()
    # numpyCopy()
    # numpyBroadcast()
    # numpyIndices()
    # numpyStringIndices()

函数和方法概览

如下是按照分类整理的常用函数和方法，完整的分类可以参考Routines

数组创建

arange
array
copy
empty
empty_like
eye # 创建一个对角线全是1的二维数组
fromfile
fromfunction
identity # 创建一个对角线全是1的方形矩阵，与eye方法差不多，只是可以接受的参数不同
linspace
logspace # 创建等比数列
mgrid
orgid
ones
ones_like
zeros
zeros_like

转换

ndarray.astype # 改变数组的元素格式
atleast_1d # 将输入转换为至少1维数组
atleast_2d
alteast_3d
mat # 将输入转换为矩阵

处理

array_split
column_stack
concatenate
diagonal
dsplit
dstack
hsplit
hstack
ndarray.item
newaxis
ravel
repeat
reshape
resize
squeeze
swapaxes
take
transpose
vsplit
vstack

Questions

all
any
nonezero
where

排序

argmax # 返回最大值的索引
argmin # 返回最小值的索引
argsort # 返回排序后的索引
max
min
ptp
searchsorted
sort

运算

choose
compress
cumprod
cumsum
inner
ndarray.fill
imag
prod
put
putmask
real
sum

基本统计

cov
mean
std
var

线性代数

cross
dot
outer
linalg
svd
vdot

版权声明：
作者：Joe.Ye
链接：https://www.appblog.cn/index.php/2023/04/01/numpy-quick-start-guide/
来源：APP全栈技术分享
文章版权归作者所有，未经允许请勿转载。

THE END

NumPy

二维码

打赏

海报

NumPy快速入门指南

简介 NumPy系统是Python的一种开源的数值计算扩展。这种工具可用来存储和处理大型矩阵。参考：https://docs.scipy.org/doc/numpy/user/quickstart.html 100 ……

为什么 SQL 语句不要过多的 join？

<<上一篇

NumPy常见运算之min、max、mean、sum、exp、sqrt、sort、乘法、点积、拼接、切分

下一篇>>

文章目录

关闭

搜索内容

NumPy快速入门指南

简介

快速入门指南

函数和方法概览

数组创建

转换

处理

Questions

排序

运算

基本统计

线性代数

取消回复

共有 0 条评论

热门文章

最新评论