import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

s = pd.Series([1, 3, 5, np.nan, 6, 8])

s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

dates = pd.date_range('20220101', '20220108')
dates

DatetimeIndex(['2022-01-01', '2022-01-02', '2022-01-03', '2022-01-04',
               '2022-01-05', '2022-01-06', '2022-01-07', '2022-01-08'],
              dtype='datetime64[ns]', freq='D')

df = pd.DataFrame(np.random.randn(len(dates), 4), index=dates, columns=list("ABCD"))
df

df2 = pd.DataFrame({'A': 1.,
                    'B': pd.Timestamp('20130102'),
                    'C': pd.Series(1, index=list(range(4)), dtype='float32'),
                    'D': np.array([3] * 4, dtype='int32'),
                    'E': pd.Categorical(["test", "train", "test", "train"]),
                    'F': 'foo'})
df2

df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

df.head(1)

df.tail(2)

df.index

DatetimeIndex(['2022-01-01', '2022-01-02', '2022-01-03', '2022-01-04',
               '2022-01-05', '2022-01-06', '2022-01-07', '2022-01-08'],
              dtype='datetime64[ns]', freq='D')

df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

print(df.axes[0][1])

2022-01-02 00:00:00

print(df)
df.hist()

                   A         B         C         D
2022-01-01 -0.631687  1.188095  1.338382 -0.551332
2022-01-02  1.409102 -1.883717 -0.228207  0.536107
2022-01-03  1.067096  0.835134  1.201175 -1.649642
2022-01-04  0.968663 -0.581464  0.295065 -1.619879
2022-01-05  0.239929 -2.513251  1.112255 -0.643571
2022-01-06 -2.429498 -1.451654 -0.639179 -2.947974
2022-01-07 -0.461846  0.771496 -0.100905 -1.080324
2022-01-08  0.271669 -0.059790 -0.029706 -2.394378

array([[<AxesSubplot:title={'center':'A'}>,
        <AxesSubplot:title={'center':'B'}>],
       [<AxesSubplot:title={'center':'C'}>,
        <AxesSubplot:title={'center':'D'}>]], dtype=object)

df.values

array([[-0.63168745,  1.1880953 ,  1.33838185, -0.55133226],
       [ 1.40910215, -1.88371741, -0.22820721,  0.53610691],
       [ 1.06709582,  0.8351338 ,  1.20117467, -1.64964158],
       [ 0.96866279, -0.58146391,  0.29506536, -1.6198791 ],
       [ 0.23992856, -2.51325115,  1.1122553 , -0.64357132],
       [-2.42949833, -1.45165431, -0.63917879, -2.94797382],
       [-0.46184615,  0.77149624, -0.10090492, -1.08032395],
       [ 0.2716691 , -0.05979006, -0.0297059 , -2.39437784]])

df.describe()

df.T

df.sort_index(axis=1, ascending=False)

df.sort_values(by="A", ascending=False)

df['20220104':'20220106']

df.loc[:, ['A', 'D']]

df.loc['20220104':'20220106',['A','B']]

df.at[dates[1], 'A']

1.4091021538454698

df[df.A > 0]

df2 = df.copy()

df2['E'] = ['one', 'one','two','three','four','three', 'four','three']
df2

df2[df2['E'].isin(('two', 'four'))]

df.at[dates[0],'A'] = 0
df

df.iat[0,1] = 0
df

df.iat[0,3] = 0
df

s1 = pd.Series([1,2,3,4,5,6], index=pd.date_range('20220102', periods=6))
df['F'] = s1
df

print(type(df.values[0, -1]), df.values[0, -1])

<class 'numpy.float64'> nan

df1 = df.copy()
df1.dropna(how='any')

df1.fillna(value=5)

pd.isna(df1)

s = pd.Series([1,3,5,np.nan,6,8], index=dates[:6]).shift(2)
s

2022-01-01    NaN
2022-01-02    NaN
2022-01-03    1.0
2022-01-04    3.0
2022-01-05    5.0
2022-01-06    NaN
Freq: D, dtype: float64

s

2022-01-01    NaN
2022-01-02    NaN
2022-01-03    1.0
2022-01-04    3.0
2022-01-05    5.0
2022-01-06    NaN
Freq: D, dtype: float64

df.add(s, axis='index')

df.apply(lambda x: x.max() - x.min())

A    3.838600
B    3.348385
C    1.977561
D    3.484081
F    5.000000
dtype: float64

df.apply(lambda x: x.max() - x.min(), axis=1)

2022-01-01    1.338382
2022-01-02    3.292820
2022-01-03    3.649642
2022-01-04    4.619879
2022-01-05    6.513251
2022-01-06    7.947974
2022-01-07    7.080324
2022-01-08    2.666047
Freq: D, dtype: float64

s = pd.Series(np.random.randint(0, 7, size=10))
s

0    5
1    0
2    3
3    1
4    0
5    3
6    3
7    0
8    3
9    5
dtype: int32

s.value_counts(ascending=True)

1    1
5    2
0    3
3    4
dtype: int64

df = pd.DataFrame({'A': ['foo', 'bar', 'foo', 'bar',
                         'foo', 'bar', 'foo', 'foo'],
                   'B': ['one', 'one', 'two', 'three',
                         'two', 'two', 'one', 'three'],
                  'C': np.random.randn(8),
                   'D': np.random.randn(8)})
df

df.groupby("A").sum()

df.groupby(['A','B']).sum()

df.to_csv('foo.csv')

pd.read_csv('foo.csv')

df.to_hdf('foo.h5', 'df')

pd.read_hdf('foo.h5', 'df')

pd.read_excel(r"C:\Users\Administrator\Desktop\雪球内容爬虫需求2022.1.10.xlsx", 0, engine="openpyxl")

	A	B	C	D
count	8.000000	8.000000	8.000000	8.000000
mean	0.054178	-0.461894	0.368610	-1.293874
std	1.235502	1.378988	0.750521	1.105099
min	-2.429498	-2.513251	-0.639179	-2.947974
25%	-0.504306	-1.559670	-0.132730	-1.835826
50%	0.255799	-0.320627	0.132680	-1.350102
75%	0.993271	0.787406	1.134485	-0.620512
max	1.409102	1.188095	1.338382	0.536107

	C	D
A
bar	0.404722	0.857120
foo	-0.315457	1.005343

		C	D
A	B
bar	one	1.733757	-0.444961
	three	0.222557	1.324174
	two	-1.551593	-0.022093
foo	one	-0.076205	-0.837857
	three	-0.645545	0.798104
	two	0.406294	1.045096

	Unnamed: 0	A	B	C	D
0	0	foo	one	-0.545937	-0.334651
1	1	bar	one	1.733757	-0.444961
2	2	foo	two	-0.006299	0.113370
3	3	bar	three	0.222557	1.324174
4	4	foo	two	0.412593	0.931726
5	5	bar	two	-1.551593	-0.022093
6	6	foo	one	0.469732	-0.503207
7	7	foo	three	-0.645545	0.798104

	统计时间	股吧内码	基金代码	实时排名	股票中文名
0	2022-01-04 13:10:00	2432	SZ002432	1	九安医疗
1	2022-01-04 13:10:00	2256	SZ002256	2	兆新股份
2	2022-01-04 13:10:00	2665	SZ002665	3	首航高科
3	2022-01-04 13:10:00	2354	SZ002354	4	天神娱乐
4	2022-01-04 13:10:00	2349	SZ002349	5	精华制药
...	...	...	...	...	...
495	2022-01-04 13:10:00	600654	SH600654	496	ST中安
496	2022-01-04 13:10:00	2082	SZ002082	497	万邦德
497	2022-01-04 13:10:00	300025	SZ300025	498	华星创业
498	2022-01-04 13:10:00	2017	SZ002017	499	东信和平
499	2022-01-04 13:10:00	300267	SZ300267	500	尔康制药

	A	B	C	D
2022-01-01	-0.631687	1.188095	1.338382	-0.551332
2022-01-02	1.409102	-1.883717	-0.228207	0.536107
2022-01-03	1.067096	0.835134	1.201175	-1.649642
2022-01-04	0.968663	-0.581464	0.295065	-1.619879
2022-01-05	0.239929	-2.513251	1.112255	-0.643571
2022-01-06	-2.429498	-1.451654	-0.639179	-2.947974
2022-01-07	-0.461846	0.771496	-0.100905	-1.080324
2022-01-08	0.271669	-0.059790	-0.029706	-2.394378

	A	B	C	D	E	F
0	1.0	2013-01-02	1.0	3	test	foo
1	1.0	2013-01-02	1.0	3	train	foo
2	1.0	2013-01-02	1.0	3	test	foo
3	1.0	2013-01-02	1.0	3	train	foo

	A	B	C	D
2022-01-01	0.000000	1.188095	1.338382	-0.551332
2022-01-02	1.409102	-1.883717	-0.228207	0.536107
2022-01-03	1.067096	0.835134	1.201175	-1.649642
2022-01-04	0.968663	-0.581464	0.295065	-1.619879
2022-01-05	0.239929	-2.513251	1.112255	-0.643571
2022-01-06	-2.429498	-1.451654	-0.639179	-2.947974
2022-01-07	-0.461846	0.771496	-0.100905	-1.080324
2022-01-08	0.271669	-0.059790	-0.029706	-2.394378

	A	B	C	D	F
2022-01-01	False	False	False	False	True
2022-01-02	False	False	False	False	False
2022-01-03	False	False	False	False	False
2022-01-04	False	False	False	False	False
2022-01-05	False	False	False	False	False
2022-01-06	False	False	False	False	False
2022-01-07	False	False	False	False	False
2022-01-08	False	False	False	False	True

	A	B	C	D
0	foo	one	-0.542044	-0.780312
1	bar	one	-1.685851	1.667171
2	foo	two	-2.512135	-1.103894
3	bar	three	-0.250959	-0.420705
4	foo	two	-3.222366	1.776841
5	bar	two	-1.228211	-0.396067
6	foo	one	2.056563	-0.819939
7	foo	three	0.819403	0.671113