python气象信息系统工程 第五章

第5章 栅格数据处理

5.1 xarray与气象栅格数据处理

5.1.3 数据数组

1
2
3
4
5
6
# 快速创建数据数组
import xarray as xr
import numpy as np
data = np.random.rand(4, 3)
foo = xr.DataArray(data)
print(foo)
Warning: ecCodes 2.21.0 or higher is recommended. You are running version 2.14.1
<xarray.DataArray (dim_0: 4, dim_1: 3)>
array([[0.67963145, 0.92665821, 0.20340572],
       [0.24031655, 0.25653271, 0.15787638],
       [0.16017296, 0.76027366, 0.01558841],
       [0.51060631, 0.92658865, 0.37249527]])
Dimensions without coordinates: dim_0, dim_1
1
2
3
4
5
6
7
8
9
# 创建完整数据数组
import xarray as xr
import numpy as np
import pandas as pd
data = np.random.rand(4, 3)
locs = ["level", "latitude", "longitude"]
times = pd.date_range("2000-01-01", periods=4)
foo = xr.DataArray(data, coords=[times, locs], dims=["time", "space"])
print(foo)
<xarray.DataArray (time: 4, space: 3)>
array([[0.80338641, 0.65603237, 0.52567211],
       [0.50568769, 0.59993651, 0.64384544],
       [0.26838498, 0.58854113, 0.3118957 ],
       [0.75973916, 0.37242213, 0.30407391]])
Coordinates:
  * time     (time) datetime64[ns] 2000-01-01 2000-01-02 2000-01-03 2000-01-04
  * space    (space) <U9 'level' 'latitude' 'longitude'
1
2
3
4
5
6
7
# pd.Series、pd.DataFrame以及pd.Panel来创建DataArray
df = pd.DataFrame({"lat": [0, 1], "lon": [2, 3]}, index=["2000-01-01", "2000-01-02"])
df.index.name = "time"
df.columns.name = "space"
print(df)
foo = xr.DataArray(df)
print(foo)
space       lat  lon
time                
2000-01-01    0    2
2000-01-02    1    3
<xarray.DataArray (time: 2, space: 2)>
array([[0, 2],
       [1, 3]])
Coordinates:
  * time     (time) object '2000-01-01' '2000-01-02'
  * space    (space) object 'lat' 'lon'
1
2
3
# 以元组组成的列表形式设置coords
foo = xr.DataArray(data, coords=[("time", times), ("space", locs)])
print(foo)
<xarray.DataArray (time: 4, space: 3)>
array([[0.80338641, 0.65603237, 0.52567211],
       [0.50568769, 0.59993651, 0.64384544],
       [0.26838498, 0.58854113, 0.3118957 ],
       [0.75973916, 0.37242213, 0.30407391]])
Coordinates:
  * time     (time) datetime64[ns] 2000-01-01 2000-01-02 2000-01-03 2000-01-04
  * space    (space) <U9 'level' 'latitude' 'longitude'
1
2
3
# 以字典形式设置coords
foo = xr.DataArray(data,coords={"time": times,"space": locs},dims=["time", "space"])
print(foo)
<xarray.DataArray (time: 4, space: 3)>
array([[0.80338641, 0.65603237, 0.52567211],
       [0.50568769, 0.59993651, 0.64384544],
       [0.26838498, 0.58854113, 0.3118957 ],
       [0.75973916, 0.37242213, 0.30407391]])
Coordinates:
  * time     (time) datetime64[ns] 2000-01-01 2000-01-02 2000-01-03 2000-01-04
  * space    (space) <U9 'level' 'latitude' 'longitude'
1
2
3
# 通过名称或索引数据数组来访问coords
print(foo.coords["time"])
print(foo["time"])
<xarray.DataArray 'time' (time: 4)>
array(['2000-01-01T00:00:00.000000000', '2000-01-02T00:00:00.000000000',
       '2000-01-03T00:00:00.000000000', '2000-01-04T00:00:00.000000000'],
      dtype='datetime64[ns]')
Coordinates:
  * time     (time) datetime64[ns] 2000-01-01 2000-01-02 2000-01-03 2000-01-04
<xarray.DataArray 'time' (time: 4)>
array(['2000-01-01T00:00:00.000000000', '2000-01-02T00:00:00.000000000',
       '2000-01-03T00:00:00.000000000', '2000-01-04T00:00:00.000000000'],
      dtype='datetime64[ns]')
Coordinates:
  * time     (time) datetime64[ns] 2000-01-01 2000-01-02 2000-01-03 2000-01-04
1
2
3
4
5
# 使用类似字典的语法设置或删除坐标,如重新设置coords
foo["time"] = pd.date_range("1999-01-01", periods=4)
print(foo)
del foo["time"]
print(foo)
<xarray.DataArray (time: 4, space: 3)>
array([[0.80338641, 0.65603237, 0.52567211],
       [0.50568769, 0.59993651, 0.64384544],
       [0.26838498, 0.58854113, 0.3118957 ],
       [0.75973916, 0.37242213, 0.30407391]])
Coordinates:
  * time     (time) datetime64[ns] 1999-01-01 1999-01-02 1999-01-03 1999-01-04
  * space    (space) <U9 'level' 'latitude' 'longitude'
<xarray.DataArray (time: 4, space: 3)>
array([[0.80338641, 0.65603237, 0.52567211],
       [0.50568769, 0.59993651, 0.64384544],
       [0.26838498, 0.58854113, 0.3118957 ],
       [0.75973916, 0.37242213, 0.30407391]])
Coordinates:
  * space    (space) <U9 'level' 'latitude' 'longitude'
Dimensions without coordinates: time
1
2
3
4
# 查看DataArray的属性
data = np.random.rand(4, 3)
foo = xr.DataArray(data,coords={"time": times,"space": locs},dims=["time", "space"])
print(foo.values)
[[0.02511101 0.39750902 0.60358049]
 [0.95935949 0.49043908 0.17389166]
 [0.58800593 0.65861825 0.61909722]
 [0.80189432 0.57056813 0.97623271]]
1
2
3
4
5
6
# 修改DataArray的values属性
data = np.random.rand(4, 3)
foo = xr.DataArray(data,coords={"time": times,"space": locs},dims=["time", "space"])
print(foo.values)
foo.values = 2 * foo.values
print(foo.values)
[[0.94464301 0.18733999 0.10886215]
 [0.43506913 0.76269183 0.57350526]
 [0.91382268 0.04468623 0.76367773]
 [0.44477094 0.56019484 0.07125937]]
[[1.88928601 0.37467998 0.21772431]
 [0.87013826 1.52538366 1.14701051]
 [1.82764537 0.08937247 1.52735547]
 [0.88954188 1.12038969 0.14251874]]
1
2
3
4
5
6
# 为DataArray补充缺失的属性
data = np.random.rand(4, 3)
foo = xr.DataArray(data,coords={"time": times,"space": locs},dims=["time", "space"])
foo.name = "foo"
foo.attrs["units"] = "meters"
print(foo)
<xarray.DataArray 'foo' (time: 4, space: 3)>
array([[0.86866017, 0.43983313, 0.11170208],
       [0.13890405, 0.35426694, 0.34257437],
       [0.89854998, 0.2483747 , 0.07325485],
       [0.38812121, 0.73746852, 0.75192879]])
Coordinates:
  * time     (time) datetime64[ns] 2000-01-01 2000-01-02 2000-01-03 2000-01-04
  * space    (space) <U9 'level' 'latitude' 'longitude'
Attributes:
    units:    meters
1
2
3
# 使用rename()方法来修改DataArray的名字
foo_new = foo.rename("foo_new")
print(foo_new)
<xarray.DataArray 'foo_new' (time: 4, space: 3)>
array([[0.86866017, 0.43983313, 0.11170208],
       [0.13890405, 0.35426694, 0.34257437],
       [0.89854998, 0.2483747 , 0.07325485],
       [0.38812121, 0.73746852, 0.75192879]])
Coordinates:
  * time     (time) datetime64[ns] 2000-01-01 2000-01-02 2000-01-03 2000-01-04
  * space    (space) <U9 'level' 'latitude' 'longitude'
Attributes:
    units:    meters

5.1.4 数据集

1
2
3
4
5
6
7
8
9
10
11
12
# 使用随机生成的数组来创建数据集
temp = 15 + 8 * np.random.randn(2, 2, 3)
precip = 10 * np.random.rand(2, 2, 3)
lon = [[-99.83, -99.32], [-99.79, -99.23]]
lat = [[42.25, 42.21], [42.63, 42.59]]
ds = xr.Dataset({"temperature": (["x", "y", "time"], temp),
"precipitation": (["x", "y", "time"], precip),},
coords={"lon": (["x", "y"], lon),
"lat": (["x", "y"], lat),
"time": pd.date_range("2014-09-06", periods=3),
"reference_time": pd.Timestamp("2014-09-05")} )
print(ds)
<xarray.Dataset>
Dimensions:         (x: 2, y: 2, time: 3)
Coordinates:
    lon             (x, y) float64 -99.83 -99.32 -99.79 -99.23
    lat             (x, y) float64 42.25 42.21 42.63 42.59
  * time            (time) datetime64[ns] 2014-09-06 2014-09-07 2014-09-08
    reference_time  datetime64[ns] 2014-09-05
Dimensions without coordinates: x, y
Data variables:
    temperature     (x, y, time) float64 13.74 20.58 19.62 ... 9.004 24.93 17.66
    precipitation   (x, y, time) float64 5.553 1.356 8.003 ... 3.666 6.822 8.288
1
2
3
4
5
6
7
# 通过传递数据数组来创建数据集
data = np.random.rand(4, 3)
locs = ["IA", "IL", "IN"]
times = pd.date_range("2000-01-01", periods=4)
foo = xr.DataArray(data, coords=[times, locs], dims=["time", "space"])
ds = xr.Dataset({"bar": foo})
print(ds)
<xarray.Dataset>
Dimensions:  (time: 4, space: 3)
Coordinates:
  * time     (time) datetime64[ns] 2000-01-01 2000-01-02 2000-01-03 2000-01-04
  * space    (space) <U2 'IA' 'IL' 'IN'
Data variables:
    bar      (time, space) float64 0.585 0.456 0.2301 ... 0.5278 0.7906 0.7462
1
2
3
4
5
6
7
# 通过pandas对象来创建数据集
data = np.random.rand(4, 3)
locs = ["IA", "IL", "IN"]
times = pd.date_range("2000-01-01", periods=4)
foo = xr.DataArray(data, coords=[times, locs], dims=["time", "space"])
ds = xr.Dataset({"bar": foo.to_pandas()})
print(ds)
<xarray.Dataset>
Dimensions:  (time: 4, space: 3)
Coordinates:
  * time     (time) datetime64[ns] 2000-01-01 2000-01-02 2000-01-03 2000-01-04
  * space    (space) object 'IA' 'IL' 'IN'
Data variables:
    bar      (time, space) float64 0.05036 0.9936 0.451 ... 0.2731 0.4817 0.2769
1
2
3
4
5
6
7
8
9
10
11
12
# 访问数据集中的变量
temp = 15 + 8 * np.random.randn(2, 2, 3)
precip = 10 * np.random.rand(2, 2, 3)
lon = [[-99.83, -99.32], [-99.79, -99.23]]
lat = [[42.25, 42.21], [42.63, 42.59]]
ds = xr.Dataset({"temperature": (["x", "y", "time"], temp),
"precipitation": (["x", "y", "time"], precip),},
coords={"lon": (["x", "y"], lon),
"lat": (["x", "y"], lat),
"time": pd.date_range("2014-09-06", periods=3),
"reference_time": pd.Timestamp("2014-09-05")} )
print(ds["temperature"])
<xarray.DataArray 'temperature' (x: 2, y: 2, time: 3)>
array([[[15.03349336, 15.44033513,  6.51238387],
        [19.54120058, -3.60023195, 21.78888719]],

       [[14.36421838, 25.00615749, -7.38567232],
        [16.29567998, 22.08970341,  3.72752459]]])
Coordinates:
    lon             (x, y) float64 -99.83 -99.32 -99.79 -99.23
    lat             (x, y) float64 42.25 42.21 42.63 42.59
  * time            (time) datetime64[ns] 2014-09-06 2014-09-07 2014-09-08
    reference_time  datetime64[ns] 2014-09-05
Dimensions without coordinates: x, y
1
2
3
4
5
6
7
8
9
10
# 使用类似字典的语法完整创建一个数据集
ds = xr.Dataset()
ds["temperature"] = (("x", "y", "time"), temp)
ds["temperature_double"] = (("x", "y", "time"), temp * 2)
ds["precipitation"] = (("x", "y", "time"), precip)
ds.coords["lat"] = (("x", "y"), lat)
ds.coords["lon"] = (("x", "y"), lon)
ds.coords["time"] = pd.date_range("2014-09-06", periods=3)
ds.coords["reference_time"] = pd.Timestamp("2014-09-05")
print(ds)
<xarray.Dataset>
Dimensions:             (x: 2, y: 2, time: 3)
Coordinates:
    lat                 (x, y) float64 42.25 42.21 42.63 42.59
    lon                 (x, y) float64 -99.83 -99.32 -99.79 -99.23
  * time                (time) datetime64[ns] 2014-09-06 2014-09-07 2014-09-08
    reference_time      datetime64[ns] 2014-09-05
Dimensions without coordinates: x, y
Data variables:
    temperature         (x, y, time) float64 15.03 15.44 6.512 ... 22.09 3.728
    temperature_double  (x, y, time) float64 30.07 30.88 13.02 ... 44.18 7.455
    precipitation       (x, y, time) float64 1.99 7.872 2.105 ... 5.925 0.6317
1
2
3
4
5
6
7
8
9
10
11
12
# 通过drop ()等方法删除变量
ds = xr.Dataset()
ds["temperature"] = (("x", "y", "time"), temp)
ds["temperature_double"] = (("x", "y", "time"), temp * 2)
ds["precipitation"] = (("x", "y", "time"), precip)
ds.coords["lat"] = (("x", "y"), lat)
ds.coords["lon"] = (("x", "y"), lon)
ds.coords["time"] = pd.date_range("2014-09-06", periods=3)
ds.coords["reference_time"] = pd.Timestamp("2014-09-05")
print(ds)
ds = ds.drop("temperature")
print(ds)
<xarray.Dataset>
Dimensions:             (x: 2, y: 2, time: 3)
Coordinates:
    lat                 (x, y) float64 42.25 42.21 42.63 42.59
    lon                 (x, y) float64 -99.83 -99.32 -99.79 -99.23
  * time                (time) datetime64[ns] 2014-09-06 2014-09-07 2014-09-08
    reference_time      datetime64[ns] 2014-09-05
Dimensions without coordinates: x, y
Data variables:
    temperature         (x, y, time) float64 15.03 15.44 6.512 ... 22.09 3.728
    temperature_double  (x, y, time) float64 30.07 30.88 13.02 ... 44.18 7.455
    precipitation       (x, y, time) float64 1.99 7.872 2.105 ... 5.925 0.6317
<xarray.Dataset>
Dimensions:             (x: 2, y: 2, time: 3)
Coordinates:
    lat                 (x, y) float64 42.25 42.21 42.63 42.59
    lon                 (x, y) float64 -99.83 -99.32 -99.79 -99.23
  * time                (time) datetime64[ns] 2014-09-06 2014-09-07 2014-09-08
    reference_time      datetime64[ns] 2014-09-05
Dimensions without coordinates: x, y
Data variables:
    temperature_double  (x, y, time) float64 30.07 30.88 13.02 ... 44.18 7.455
    precipitation       (x, y, time) float64 1.99 7.872 2.105 ... 5.925 0.6317
1
2
3
4
5
6
7
8
9
10
11
12
#使用assign()来修改或替换数据集的值
ds = xr.Dataset()
ds["temperature"] = (("x", "y", "time"), temp)
ds["temperature_double"] = (("x", "y", "time"), temp * 2)
ds["precipitation"] = (("x", "y", "time"), precip)
ds.coords["lat"] = (("x", "y"), lat)
ds.coords["lon"] = (("x", "y"), lon)
ds.coords["time"] = pd.date_range("2014-09-06", periods=3)
ds.coords["reference_time"] = pd.Timestamp("2014-09-05")
print(ds)
ds.assign(temperature2=2 * ds.temperature)
print(ds)
<xarray.Dataset>
Dimensions:             (x: 2, y: 2, time: 3)
Coordinates:
    lat                 (x, y) float64 42.25 42.21 42.63 42.59
    lon                 (x, y) float64 -99.83 -99.32 -99.79 -99.23
  * time                (time) datetime64[ns] 2014-09-06 2014-09-07 2014-09-08
    reference_time      datetime64[ns] 2014-09-05
Dimensions without coordinates: x, y
Data variables:
    temperature         (x, y, time) float64 15.03 15.44 6.512 ... 22.09 3.728
    temperature_double  (x, y, time) float64 30.07 30.88 13.02 ... 44.18 7.455
    precipitation       (x, y, time) float64 1.99 7.872 2.105 ... 5.925 0.6317
<xarray.Dataset>
Dimensions:             (x: 2, y: 2, time: 3)
Coordinates:
    lat                 (x, y) float64 42.25 42.21 42.63 42.59
    lon                 (x, y) float64 -99.83 -99.32 -99.79 -99.23
  * time                (time) datetime64[ns] 2014-09-06 2014-09-07 2014-09-08
    reference_time      datetime64[ns] 2014-09-05
Dimensions without coordinates: x, y
Data variables:
    temperature         (x, y, time) float64 15.03 15.44 6.512 ... 22.09 3.728
    temperature_double  (x, y, time) float64 30.07 30.88 13.02 ... 44.18 7.455
    precipitation       (x, y, time) float64 1.99 7.872 2.105 ... 5.925 0.6317
1
2
3
4
5
# 使用assign_coords()对DataArray的坐标标签进行重新声明
da = xr.DataArray(np.random.rand(4),coords=[np.array([358, 359, 0, 1])],dims="lon")
print(da)
da.assign_coords(lon=(((da.lon + 180) % 360) - 180))
print(da)
<xarray.DataArray (lon: 4)>
array([0.18946962, 0.0144292 , 0.47497827, 0.26471126])
Coordinates:
  * lon      (lon) int64 358 359 0 1
<xarray.DataArray (lon: 4)>
array([0.18946962, 0.0144292 , 0.47497827, 0.26471126])
Coordinates:
  * lon      (lon) int64 358 359 0 1
1
2
# 通过rename()对变量进行重命名
ds.rename({"temperature": "temp", "precipitation": "precip"})
<xarray.Dataset>
Dimensions: (x: 2, y: 2, time: 3)
Coordinates:
lat (x, y) float64 42.25 42.21 42.63 42.59
lon (x, y) float64 -99.83 -99.32 -99.79 -99.23

  • time (time) datetime64[ns] 2014-09-06 2014-09-07 2014-09-08
    reference_time datetime64[ns] 2014-09-05
    Dimensions without coordinates: x, y
    Data variables:
    temp (x, y, time) float64 15.03 15.44 6.512 ... 22.09 3.728
    temperature_double (x, y, time) float64 30.07 30.88 13.02 ... 44.18 7.455
    precip (x, y, time) float64 1.99 7.872 2.105 ... 5.925 0.6317

5.1.5 数据数组与数据集的处理

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
# 创建数据数组
da = xr.DataArray(np.random.rand(4, 3),
[("time", pd.date_range("2000-01-01", periods=4)),("space", ["level", "lat", "lon"])])
print(da)
# 利用位置进行索引
print(da[0, 0])
print(da[:, [2, 1]])
# 使用.loc属性进行基于标签的索引
print(da.loc["2000-01-01":"2000-01-02", "lat"])
# 利用基于标签的索引来修改数据的值
da.loc["2000-01-01", ["lat", "lon"]] = -10
print(da)
# 利用维度名称进行索引
print(da[dict(space=0, time=slice(None, 2))])
# 利用维度坐标标签索引
print(da.loc[dict(time=slice("2000-01-01", "2000-01-02"))])
print(da.isel(space=0, time=slice(None, 2)))
print(da.sel(time=slice("2000-01-01", "2000-01-02")))
<xarray.DataArray (time: 4, space: 3)>
array([[0.83459434, 0.62437497, 0.82596113],
       [0.34848421, 0.76559789, 0.28536279],
       [0.90091043, 0.97759082, 0.76602217],
       [0.26035703, 0.01217118, 0.48790219]])
Coordinates:
  * time     (time) datetime64[ns] 2000-01-01 2000-01-02 2000-01-03 2000-01-04
  * space    (space) <U5 'level' 'lat' 'lon'
<xarray.DataArray ()>
array(0.83459434)
Coordinates:
    time     datetime64[ns] 2000-01-01
    space    <U5 'level'
<xarray.DataArray (time: 4, space: 2)>
array([[0.82596113, 0.62437497],
       [0.28536279, 0.76559789],
       [0.76602217, 0.97759082],
       [0.48790219, 0.01217118]])
Coordinates:
  * time     (time) datetime64[ns] 2000-01-01 2000-01-02 2000-01-03 2000-01-04
  * space    (space) <U5 'lon' 'lat'
<xarray.DataArray (time: 2)>
array([0.62437497, 0.76559789])
Coordinates:
  * time     (time) datetime64[ns] 2000-01-01 2000-01-02
    space    <U5 'lat'
<xarray.DataArray (time: 4, space: 3)>
array([[  0.83459434, -10.        , -10.        ],
       [  0.34848421,   0.76559789,   0.28536279],
       [  0.90091043,   0.97759082,   0.76602217],
       [  0.26035703,   0.01217118,   0.48790219]])
Coordinates:
  * time     (time) datetime64[ns] 2000-01-01 2000-01-02 2000-01-03 2000-01-04
  * space    (space) <U5 'level' 'lat' 'lon'
<xarray.DataArray (time: 2)>
array([0.83459434, 0.34848421])
Coordinates:
  * time     (time) datetime64[ns] 2000-01-01 2000-01-02
    space    <U5 'level'
<xarray.DataArray (time: 2, space: 3)>
array([[  0.83459434, -10.        , -10.        ],
       [  0.34848421,   0.76559789,   0.28536279]])
Coordinates:
  * time     (time) datetime64[ns] 2000-01-01 2000-01-02
  * space    (space) <U5 'level' 'lat' 'lon'
<xarray.DataArray (time: 2)>
array([0.83459434, 0.34848421])
Coordinates:
  * time     (time) datetime64[ns] 2000-01-01 2000-01-02
    space    <U5 'level'
<xarray.DataArray (time: 2, space: 3)>
array([[  0.83459434, -10.        , -10.        ],
       [  0.34848421,   0.76559789,   0.28536279]])
Coordinates:
  * time     (time) datetime64[ns] 2000-01-01 2000-01-02
  * space    (space) <U5 'level' 'lat' 'lon'
1
2
3
4
5
6
# 邻近模糊索引
da = xr.DataArray([1, 2, 3], [("x", [0, 1, 2])])
print(da)
print(da.sel(x=[1.1, 1.9], method="nearest"))
print(da.sel(x=0.1, method="backfill"))
print(da.sel(x=[0.5, 1, 1.5, 2, 2.5], method="pad"))
<xarray.DataArray (x: 3)>
array([1, 2, 3])
Coordinates:
  * x        (x) int64 0 1 2
<xarray.DataArray (x: 2)>
array([2, 3])
Coordinates:
  * x        (x) int64 1 2
<xarray.DataArray ()>
array(2)
Coordinates:
    x        int64 1
<xarray.DataArray (x: 5)>
array([1, 2, 2, 3, 3])
Coordinates:
  * x        (x) int64 0 1 1 2 2
1
2
3
# 使用tolerance参数设置模糊索引允许的最大偏差范围
da = xr.DataArray([1, 2, 3], [("x", [0, 1, 2])])
print(da.reindex(x=[0.9, 1.1, 1.5], method="nearest", tolerance=0.2))
<xarray.DataArray (x: 3)>
array([ 2.,  2., nan])
Coordinates:
  * x        (x) float64 0.9 1.1 1.5
1
2
3
4
5
# 保持数据的原始形状,且需要掩盖掉某些元素
da = xr.DataArray(np.arange(16).reshape(4, 4), dims=["x", "y"])
print(da.where(da.y < 2))
# 剔除缺测部分
print(da.where(da.y < 2, drop=True))
<xarray.DataArray (x: 4, y: 4)>
array([[ 0.,  1., nan, nan],
       [ 4.,  5., nan, nan],
       [ 8.,  9., nan, nan],
       [12., 13., nan, nan]])
Dimensions without coordinates: x, y
<xarray.DataArray (x: 4, y: 2)>
array([[ 0.,  1.],
       [ 4.,  5.],
       [ 8.,  9.],
       [12., 13.]])
Dimensions without coordinates: x, y
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
# 缺测值处理
x = xr.DataArray([0, 1, np.nan, np.nan, 2], dims=["x"])
print(x)
# 根据x中缺测值的位置返回布尔值
print(x.isnull())
# 根据x中非缺测值的位置返回布尔值
print(x.notnull())
# 返回x中非缺测值的个数
print(x.count())
# 将数据数组x中名为x的维度上的缺测值去除
print(x.dropna(dim="x"))
# 将数据数组x中的缺测值替换为−1
print(x.fillna(-1))
# 将数据数组中的缺测值替换为向前最近的一个非缺测值
print(x.ffill("x"))
# 将数据数组中的缺测值替换为向后最近的一个非缺测值
print(x.bfill("x"))
<xarray.DataArray (x: 5)>
array([ 0.,  1., nan, nan,  2.])
Dimensions without coordinates: x
<xarray.DataArray (x: 5)>
array([False, False,  True,  True, False])
Dimensions without coordinates: x
<xarray.DataArray (x: 5)>
array([ True,  True, False, False,  True])
Dimensions without coordinates: x
<xarray.DataArray ()>
array(3)
<xarray.DataArray (x: 3)>
array([0., 1., 2.])
Dimensions without coordinates: x
<xarray.DataArray (x: 5)>
array([ 0.,  1., -1., -1.,  2.])
Dimensions without coordinates: x
<xarray.DataArray (x: 5)>
array([0., 1., 1., 1., 2.])
Dimensions without coordinates: x
<xarray.DataArray (x: 5)>
array([0., 1., 2., 2., 2.])
Dimensions without coordinates: x
1
2
3
4
5
6
7
8
9
# 聚合运算
arr = xr.DataArray(np.random.RandomState(0).randn(2, 3), [("lat", [5, 10]), ("lon", [15, 20, 25])])
print(arr)
# 对其沿lat轴求和
print(arr.sum(dim="lat"))
# 对整个数据数组求标准差
print(arr.std(["lat", "lon"]))
# 返回整个数据数组的最小值
print(arr.min())
<xarray.DataArray (lat: 2, lon: 3)>
array([[ 1.76405235,  0.40015721,  0.97873798],
       [ 2.2408932 ,  1.86755799, -0.97727788]])
Coordinates:
  * lat      (lat) int64 5 10
  * lon      (lon) int64 15 20 25
<xarray.DataArray (lon: 3)>
array([4.00494555e+00, 2.26771520e+00, 1.46010423e-03])
Coordinates:
  * lon      (lon) int64 15 20 25
<xarray.DataArray ()>
array(1.09038344)
<xarray.DataArray ()>
array(-0.97727788)
1
2
3
# 通过设置skipna=False来禁止跳过缺测值NaN
print(xr.DataArray([1, 2, np.nan, 3]).mean())
print(xr.DataArray([1, 2, np.nan, 3]).mean(skipna=False))
<xarray.DataArray ()>
array(2.)
<xarray.DataArray ()>
array(nan)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
# 数据滑动
arr = xr.DataArray(np.arange(0, 7.5, 0.5).reshape(3, 5), dims=("lat", "lon"))
print(arr)
print(arr.rolling(lon=3))
# 聚合运算的方法可以直接对Rolling对象使用
r = arr.rolling(lon=3)
print(r.mean())
# 通过传递center=True来使结果居中
r = arr.rolling(lon=3, center=True)
print(r.mean())
# 在调用 rolling()时设置min_periods将更改窗口中的最小滑动长度,以便在聚合时具有值
r = arr.rolling(lon=3, center=True, min_periods=2)
print(r.mean())
# 多维度滑动
print(arr.rolling(lat=2, lon=3, min_periods=2))
<xarray.DataArray (lat: 3, lon: 5)>
array([[0. , 0.5, 1. , 1.5, 2. ],
       [2.5, 3. , 3.5, 4. , 4.5],
       [5. , 5.5, 6. , 6.5, 7. ]])
Dimensions without coordinates: lat, lon
DataArrayRolling [lon->3]
<xarray.DataArray (lat: 3, lon: 5)>
array([[nan, nan, 0.5, 1. , 1.5],
       [nan, nan, 3. , 3.5, 4. ],
       [nan, nan, 5.5, 6. , 6.5]])
Dimensions without coordinates: lat, lon
<xarray.DataArray (lat: 3, lon: 5)>
array([[nan, 0.5, 1. , 1.5, nan],
       [nan, 3. , 3.5, 4. , nan],
       [nan, 5.5, 6. , 6.5, nan]])
Dimensions without coordinates: lat, lon
<xarray.DataArray (lat: 3, lon: 5)>
array([[0.25, 0.5 , 1.  , 1.5 , 1.75],
       [2.75, 3.  , 3.5 , 4.  , 4.25],
       [5.25, 5.5 , 6.  , 6.5 , 6.75]])
Dimensions without coordinates: lat, lon
DataArrayRolling [lat->2,lon->3]
1
2
3
4
5
6
7
8
9
10
# 数据的拆分与组合
ds = xr.Dataset({"foo": (("x", "y"), np.random.rand(4, 3))},coords={"x": [10, 20, 30, 40], "letters": ("x", list("abba"))})
arr = ds["foo"]
print(ds)
# 按数据集中变量或坐标的名称进行分组
print(ds.groupby("letters"))
# 使用.groups属性查看组索引
print(ds.groupby("letters").groups)
# 对每个分组进行函数运算
print(ds["foo"].groupby("letters").mean(dim="x"))
<xarray.Dataset>
Dimensions:  (x: 4, y: 3)
Coordinates:
  * x        (x) int64 10 20 30 40
    letters  (x) <U1 'a' 'b' 'b' 'a'
Dimensions without coordinates: y
Data variables:
    foo      (x, y) float64 0.9129 0.5615 0.9985 0.788 ... 0.7002 0.2049 0.02066
DatasetGroupBy, grouped over 'letters'
2 groups with labels 'a', 'b'.
{'a': [0, 3], 'b': [1, 2]}
<xarray.DataArray 'foo' (letters: 2, y: 3)>
array([[0.80654464, 0.38319598, 0.50960111],
       [0.72969603, 0.11503978, 0.44786172]])
Coordinates:
  * letters  (letters) object 'a' 'b'
Dimensions without coordinates: y
1
2
3
4
5
6
7
8
9
10
# 数据的变形和重组
ds = xr.Dataset({"foo": (("x", "y", "z"), [[[42]]]), "bar": (("y", "z"), [[24]])})
# 使用transpose("y", "z", "x")调整其维度
print(ds.transpose("y", "z", "x"))
# 当不传入参数时,则默认逆转全部维度顺序
print(ds.transpose())
# 扩充和删除维度
expanded = ds.expand_dims("w")
print(expanded)
print(expanded.squeeze("w"))
<xarray.Dataset>
Dimensions:  (x: 1, y: 1, z: 1)
Dimensions without coordinates: x, y, z
Data variables:
    foo      (y, z, x) int64 42
    bar      (y, z) int64 24
<xarray.Dataset>
Dimensions:  (x: 1, y: 1, z: 1)
Dimensions without coordinates: x, y, z
Data variables:
    foo      (z, y, x) int64 42
    bar      (z, y) int64 24
<xarray.Dataset>
Dimensions:  (x: 1, y: 1, z: 1, w: 1)
Dimensions without coordinates: x, y, z, w
Data variables:
    foo      (w, x, y, z) int64 42
    bar      (w, y, z) int64 24
<xarray.Dataset>
Dimensions:  (x: 1, y: 1, z: 1)
Dimensions without coordinates: x, y, z
Data variables:
    foo      (x, y, z) int64 42
    bar      (y, z) int64 24
1
2
3
4
5
6
# 数据集和数据数组的转换
ds = xr.Dataset({"foo": (("x", "y", "z"), [[[42]]]), "bar": (("y", "z"), [[24]])})
print(ds.to_array())
ds = xr.Dataset({"foo": (("x", "y", "z"), [[[42]]]), "bar": (("y", "z"), [[24]])})
arr = ds.to_array()
print(arr.to_dataset(dim="variable"))
<xarray.DataArray (variable: 2, x: 1, y: 1, z: 1)>
array([[[[42]]],


       [[[24]]]])
Coordinates:
  * variable  (variable) <U3 'foo' 'bar'
Dimensions without coordinates: x, y, z
<xarray.Dataset>
Dimensions:  (x: 1, y: 1, z: 1)
Dimensions without coordinates: x, y, z
Data variables:
    foo      (x, y, z) int64 42
    bar      (x, y, z) int64 24
1
2
3
4
5
6
7
8
# 数据维度的堆栈与出栈
array = xr.DataArray(np.random.randn(2, 3), coords=[("x", ["a", "b"]), ("y", [0, 1, 2])])
print(array)
# 对数据的x和y维度进行堆叠
stacked = array.stack(z=("x", "y"))
print(stacked)
# 使用unstack()方法将堆叠的数据拆分
print(stacked.unstack("z"))
<xarray.DataArray (x: 2, y: 3)>
array([[ 0.23731548,  1.39402883,  0.74502096],
       [-1.68168184,  0.08053787,  0.00270266]])
Coordinates:
  * x        (x) <U1 'a' 'b'
  * y        (y) int64 0 1 2
<xarray.DataArray (z: 6)>
array([ 0.23731548,  1.39402883,  0.74502096, -1.68168184,  0.08053787,
        0.00270266])
Coordinates:
  * z        (z) MultiIndex
  - x        (z) object 'a' 'a' 'a' 'b' 'b' 'b'
  - y        (z) int64 0 1 2 0 1 2
<xarray.DataArray (x: 2, y: 3)>
array([[ 0.23731548,  1.39402883,  0.74502096],
       [-1.68168184,  0.08053787,  0.00270266]])
Coordinates:
  * x        (x) object 'a' 'b'
  * y        (y) int64 0 1 2
1
2
3
4
5
6
7
# 数据的移动和滚动
array = xr.DataArray([1, 2, 3, 4], dims="x")
print(array)
# 将数据向右侧移动两次
print(array.shift(x=2))
# 将数据向右滚动两次
print(array.roll(x=2, roll_coords=True))
<xarray.DataArray (x: 4)>
array([1, 2, 3, 4])
Dimensions without coordinates: x
<xarray.DataArray (x: 4)>
array([nan, nan,  1.,  2.])
Dimensions without coordinates: x
<xarray.DataArray (x: 4)>
array([3, 4, 1, 2])
Dimensions without coordinates: x
1
2
3
4
5
6
7
8
9
10
11
12
# 数据合并
arr = xr.DataArray(np.random.randn(2, 3), [("x", ["a", "b"]), ("y", [10, 20, 30])])
print(arr)
# 合并数据数组arr的两个子数组
print(xr.concat([arr[:, 2:], arr[:, 1:]], dim="y"))
# 使用不同变量组合数据集或数据数组
print(xr.merge([xr.DataArray(n, name="var%d" % n) for n in range(5)]))
# 使用不同索引或缺失值组合数据集或数据数组
ar0 = xr.DataArray([[0, 0], [0, 0]], [("x", ["a", "b"]), ("y", [-1, 0])])
ar1 = xr.DataArray([[1, 1], [1, 1]], [("x", ["b", "c"]), ("y", [0, 1])])
print(ar0.combine_first(ar1))
print(ar1.combine_first(ar0))
<xarray.DataArray (x: 2, y: 3)>
array([[-1.98082369, -0.93221737, -1.07522404],
       [-0.4474485 ,  0.55808941, -0.48316307]])
Coordinates:
  * x        (x) <U1 'a' 'b'
  * y        (y) int64 10 20 30
<xarray.DataArray (x: 2, y: 3)>
array([[-1.07522404, -0.93221737, -1.07522404],
       [-0.48316307,  0.55808941, -0.48316307]])
Coordinates:
  * x        (x) <U1 'a' 'b'
  * y        (y) int64 30 20 30
<xarray.Dataset>
Dimensions:  ()
Data variables:
    var0     int64 0
    var1     int64 1
    var2     int64 2
    var3     int64 3
    var4     int64 4
<xarray.DataArray (x: 3, y: 3)>
array([[ 0.,  0., nan],
       [ 0.,  0.,  1.],
       [nan,  1.,  1.]])
Coordinates:
  * x        (x) <U1 'a' 'b' 'c'
  * y        (y) int64 -1 0 1
<xarray.DataArray (x: 3, y: 3)>
array([[ 0.,  0., nan],
       [ 0.,  1.,  1.],
       [nan,  1.,  1.]])
Coordinates:
  * x        (x) <U1 'a' 'b' 'c'
  * y        (y) int64 -1 0 1
1
2
3
4
5
6
7
8
# 沿多个维度组合数据集或数据数组
arr = xr.DataArray(name="temperature", data=[[1,2],[3,4]], dims=["x", "y"])
ds_grid = [[arr, arr], [arr, arr]]
print(xr.combine_nested(ds_grid, concat_dim=["x", "y"]))
# 从数据的坐标自动推断顺序
x1 = xr.DataArray(name="foo", data=[1,3,5], coords=[("x", [1,3,5])])
x2 = xr.DataArray(name="foo", data=[2,4,6], coords=[("x", [2,4,6])])
print(xr.combine_by_coords([x2, x1]))
<xarray.DataArray 'temperature' (x: 4, y: 4)>
array([[1, 2, 1, 2],
       [3, 4, 3, 4],
       [1, 2, 1, 2],
       [3, 4, 3, 4]])
Dimensions without coordinates: x, y
<xarray.Dataset>
Dimensions:  (x: 6)
Coordinates:
  * x        (x) int64 1 2 3 4 5 6
Data variables:
    foo      (x) float64 1.0 2.0 3.0 4.0 5.0 6.0


/opt/conda/lib/python3.7/site-packages/xarray/core/dataarray.py:789: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison
  return key in self.data
/opt/conda/lib/python3.7/site-packages/xarray/core/dataarray.py:789: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison
  return key in self.data
1
2
3
4
5
# 时间序列数据
print(pd.to_datetime(["2000-01-01", "2000-02-02"]))
print(pd.date_range("2000-01-01", periods=365))
import datetime
print(xr.Dataset({"time": datetime.datetime(2000, 1, 1)}))
DatetimeIndex(['2000-01-01', '2000-02-02'], dtype='datetime64[ns]', freq=None)
DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-03', '2000-01-04',
               '2000-01-05', '2000-01-06', '2000-01-07', '2000-01-08',
               '2000-01-09', '2000-01-10',
               ...
               '2000-12-21', '2000-12-22', '2000-12-23', '2000-12-24',
               '2000-12-25', '2000-12-26', '2000-12-27', '2000-12-28',
               '2000-12-29', '2000-12-30'],
              dtype='datetime64[ns]', length=365, freq='D')
<xarray.Dataset>
Dimensions:  ()
Data variables:
    time     datetime64[ns] 2000-01-01
1
2
3
4
5
6
# datetime索引
time = pd.date_range("2000-01-01", freq="H", periods=365 * 24)
ds = xr.Dataset({"foo": ("time", np.arange(365 * 24)), "time": time})
print(ds.sel(time="2000-01"))
print(ds.sel(time=slice("2000-06-01", "2000-06-10")))
print(ds.sel(time=datetime.time(12)))
<xarray.Dataset>
Dimensions:  (time: 744)
Coordinates:
  * time     (time) datetime64[ns] 2000-01-01 ... 2000-01-31T23:00:00
Data variables:
    foo      (time) int64 0 1 2 3 4 5 6 7 8 ... 736 737 738 739 740 741 742 743
<xarray.Dataset>
Dimensions:  (time: 240)
Coordinates:
  * time     (time) datetime64[ns] 2000-06-01 ... 2000-06-10T23:00:00
Data variables:
    foo      (time) int64 3648 3649 3650 3651 3652 ... 3883 3884 3885 3886 3887
<xarray.Dataset>
Dimensions:  (time: 365)
Coordinates:
  * time     (time) datetime64[ns] 2000-01-01T12:00:00 ... 2000-12-30T12:00:00
Data variables:
    foo      (time) int64 12 36 60 84 108 132 ... 8628 8652 8676 8700 8724 8748

5.2 Metpy入门

5.2.2 MetPy的单位制

1
2
3
4
5
6
7
8
# MetPy的单位制运算
import numpy as np
from metpy.units import units
distance = np.arange(1, 5) * units.meters
time = units.Quantity(np.arange(1, 5), 'sec')
print(distance / time)
print(3 * units.inch + 5 * units.cm)
print((1 * units.inch).to(units.mm))
[1.0 1.0 1.0 1.0] meter / second
4.968503937007874 inch
25.4 millimeter
1
2
3
# 转换为国际单位制
Lf = 3.34e6 * units('J/kg')
print(Lf, Lf.to_base_units(), sep='\n')
3340000.0 joule / kilogram
3340000.0 meter ** 2 / second ** 2
1
2
3
4
5
# 偏移量单位
Lf = 3.34e6 * units('J/kg')
print(Lf, Lf.to_base_units(), sep='\n')
print(25 * units.degC + 5 * units.delta_degC)
print(273 * units.kelvin + 10 * units.kelvin)
3340000.0 joule / kilogram
3340000.0 meter ** 2 / second ** 2
30 degree_Celsius
283 kelvin
1
2
3
4
5
6
7
8
# 使用带有单位的变量进行MetPy中的函数计算
import numpy as np
import metpy.calc as mpcalc
from metpy.units import units
temperature = 73.2 * units("degF")#通过unit()函数传入单位字符串,当单位字符串包含空格及符号时只能用此方法
rh = 64 * units.percent #通过units.xxx赋单位
dewpoint = mpcalc.dewpoint_from_relative_humidity(temperature, rh)
print(dewpoint)
15.726236381245258 degree_Celsius
1
2
3
# Metpy中的常数使用
import metpy.constants as constants
print(5*units.m * constants.g)
49.033249999999995 meter ** 2 / second ** 2