PANDAS Archives - 11 중 10 번째 페이지

[PYTHON/PANDAS] DataFrame 클래스 : loc 속성을 사용해 특정 조건을 만족하는 특정 컬럼 Series 객체 구하기

■ DataFrame 클래스의 loc 속성을 사용해 특정 조건을 만족하는 특정 컬럼 Series 객체를 구하는 방법을 보여준다. ▶ main.py


import pandas as pd

dataFrame = pd.read_excel("titanic.xlsx", sheet_name = "passengers")

series = dataFrame.loc[dataFrame["Age"] > 35, "Name"]

print(series)

"""
1      Cumings, Mrs. John Bradley (Florence Briggs Th...
6                                McCarthy, Mr. Timothy J
11                               Bonnell, Miss Elizabeth
13                           Andersson, Mr. Anders Johan
15                      Hewlett, Mrs. (Mary D Kingcome)
                             ...
865                             Bystrom, Mrs. (Karolina)
871     Beckwith, Mrs. Richard Leonard (Sallie Monypeny)
873                          Vander Cruyssen, Mr. Victor
879        Potter, Mrs. Thomas Jr (Lily Alexenia Wilson)
885                 Rice, Mrs. William (Margaret Norton)
Name: Name, Length: 217, dtype: object
"""

import pandas as pd

dataFrame = pd.read_excel("titanic.xlsx", sheet_name = "passengers")

series = dataFrame.loc[dataFrame["Age"] > 35, "Name"]

print(series)

"""

1 Cumings, Mrs. John Bradley (Florence Briggs Th...

6 McCarthy, Mr. Timothy J

11 Bonnell, Miss Elizabeth

13 Andersson, Mr. Anders Johan

15 Hewlett, Mrs. (Mary D Kingcome)

...

865 Bystrom, Mrs. (Karolina)

871 Beckwith, Mrs. Richard Leonard (Sallie Monypeny)

873 Vander Cruyssen, Mr. Victor

879 Potter, Mrs. Thomas Jr (Lily Alexenia Wilson)

885 Rice, Mrs. William (Margaret Norton)

Name: Name, Length: 217, dtype: object

"""

▶ requirements.txt


defusedxml==0.7.1
et_xmlfile==2.0.0
numpy==2.1.2
odfpy==1.4.1
openpyxl==3.1.5
pandas==2.2.3
python-calamine==0.2.3
python-dateutil==2.9.0.post0
pytz==2024.2
pyxlsb==1.0.10
six==1.16.0
tzdata==2024.2
xlrd==2.0.1
XlsxWriter==3.2.0

defusedxml==0.7.1

et_xmlfile==2.0.0

numpy==2.1.2

odfpy==1.4.1

openpyxl==3.1.5

pandas==2.2.3

python-calamine==0.2.3

python-dateutil==2.9.0.post0

pytz==2024.2

pyxlsb==1.0.10

six==1.16.0

tzdata==2024.2

xlrd==2.0.1

XlsxWriter==3.2.0

[PYTHON/PANDAS] DataFrame 클래스 : [] 연산자를 사용해 특정 컬럼에서 값을 갖는 DataFrame 객체 구하기

■ DataFrame 클래스에서 [] 연산자를 사용해 특정 컬럼에서 값을 갖는 DataFrame 객체를 구하는 방법을 보여준다. ▶ main.py


import pandas as pd

dataFrame1 = pd.read_excel("titanic.xlsx", sheet_name = "passengers")

dataFrame2 = dataFrame[dataFrame1["Age"].notna()]

print(dataFrame2)

"""
     PassengerId  Survived  Pclass                                               Name     Sex   Age  SibSp  Parch            Ticket     Fare Cabin Embarked
0              1         0       3                            Braund, Mr. Owen Harris    male  22.0      1      0         A/5 21171   7.2500   NaN        S
1              2         1       1  Cumings, Mrs. John Bradley (Florence Briggs Th...  female  38.0      1      0          PC 17599  71.2833   C85        C
2              3         1       3                              Heikkinen, Miss Laina  female  26.0      0      0  STON/O2. 3101282   7.9250   NaN        S
3              4         1       1       Futrelle, Mrs. Jacques Heath (Lily May Peel)  female  35.0      1      0            113803  53.1000  C123        S
4              5         0       3                           Allen, Mr. William Henry    male  35.0      0      0            373450   8.0500   NaN        S
..           ...       ...     ...                                                ...     ...   ...    ...    ...               ...      ...   ...      ...
885          886         0       3               Rice, Mrs. William (Margaret Norton)  female  39.0      0      5            382652  29.1250   NaN        Q
886          887         0       2                              Montvila, Rev. Juozas    male  27.0      0      0            211536  13.0000   NaN        S
887          888         1       1                        Graham, Miss Margaret Edith  female  19.0      0      0            112053  30.0000   B42        S
889          890         1       1                              Behr, Mr. Karl Howell    male  26.0      0      0            111369  30.0000  C148        C
890          891         0       3                                Dooley, Mr. Patrick    male  32.0      0      0            370376   7.7500   NaN        Q

[714 rows x 12 columns]
"""

import pandas as pd

dataFrame1 = pd.read_excel("titanic.xlsx", sheet_name = "passengers")

dataFrame2 = dataFrame[dataFrame1["Age"].notna()]

print(dataFrame2)

"""

PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked

0 1 0 3 Braund, Mr. Owen Harris male 22.0 1 0 A/5 21171 7.2500 NaN S

1 2 1 1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 0 PC 17599 71.2833 C85 C

2 3 1 3 Heikkinen, Miss Laina female 26.0 0 0 STON/O2. 3101282 7.9250 NaN S

3 4 1 1 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 0 113803 53.1000 C123 S

4 5 0 3 Allen, Mr. William Henry male 35.0 0 0 373450 8.0500 NaN S

.. ... ... ... ... ... ... ... ... ... ... ... ...

885 886 0 3 Rice, Mrs. William (Margaret Norton) female 39.0 0 5 382652 29.1250 NaN Q

886 887 0 2 Montvila, Rev. Juozas male 27.0 0 0 211536 13.0000 NaN S

887 888 1 1 Graham, Miss Margaret Edith female 19.0 0 0 112053 30.0000 B42 S

889 890 1 1 Behr, Mr. Karl Howell male 26.0 0 0 111369 30.0000 C148 C

890 891 0 3 Dooley, Mr. Patrick male 32.0 0 0 370376 7.7500 NaN Q

[714 rows x 12 columns]

"""

▶ requirements.txt


defusedxml==0.7.1
et_xmlfile==2.0.0
numpy==2.1.2
odfpy==1.4.1
openpyxl==3.1.5
pandas==2.2.3
python-calamine==0.2.3
python-dateutil==2.9.0.post0
pytz==2024.2
pyxlsb==1.0.10
six==1.16.0
tzdata==2024.2
xlrd==2.0.1
XlsxWriter==3.2.0

defusedxml==0.7.1

et_xmlfile==2.0.0

numpy==2.1.2

odfpy==1.4.1

openpyxl==3.1.5

pandas==2.2.3

python-calamine==0.2.3

python-dateutil==2.9.0.post0

pytz==2024.2

pyxlsb==1.0.10

six==1.16.0

tzdata==2024.2

xlrd==2.0.1

XlsxWriter==3.2.0

※

[PYTHON/PANDAS] DataFrame 클래스 : [] 연산자를 사용해 특정 컬럼에서 값을 갖는 DataFrame 객체 구하기

■ DataFrame 클래스에서 [] 연산자를 사용해 특정 컬럼에서 값을 갖는 DataFrame 객체를 구하는 방법을 보여준다. ▶ main.py


import pandas as pd

dataFrame1 = pd.read_excel("titanic.xlsx", sheet_name = "passengers")

dataFrame2 = dataFrame[dataFrame1["Age"].notna()]

print(dataFrame2)

"""
     PassengerId  Survived  Pclass                                               Name     Sex   Age  SibSp  Parch            Ticket     Fare Cabin Embarked
0              1         0       3                            Braund, Mr. Owen Harris    male  22.0      1      0         A/5 21171   7.2500   NaN        S
1              2         1       1  Cumings, Mrs. John Bradley (Florence Briggs Th...  female  38.0      1      0          PC 17599  71.2833   C85        C
2              3         1       3                              Heikkinen, Miss Laina  female  26.0      0      0  STON/O2. 3101282   7.9250   NaN        S
3              4         1       1       Futrelle, Mrs. Jacques Heath (Lily May Peel)  female  35.0      1      0            113803  53.1000  C123        S
4              5         0       3                           Allen, Mr. William Henry    male  35.0      0      0            373450   8.0500   NaN        S
..           ...       ...     ...                                                ...     ...   ...    ...    ...               ...      ...   ...      ...
885          886         0       3               Rice, Mrs. William (Margaret Norton)  female  39.0      0      5            382652  29.1250   NaN        Q
886          887         0       2                              Montvila, Rev. Juozas    male  27.0      0      0            211536  13.0000   NaN        S
887          888         1       1                        Graham, Miss Margaret Edith  female  19.0      0      0            112053  30.0000   B42        S
889          890         1       1                              Behr, Mr. Karl Howell    male  26.0      0      0            111369  30.0000  C148        C
890          891         0       3                                Dooley, Mr. Patrick    male  32.0      0      0            370376   7.7500   NaN        Q

[714 rows x 12 columns]
"""

import pandas as pd

dataFrame1 = pd.read_excel("titanic.xlsx", sheet_name = "passengers")

dataFrame2 = dataFrame[dataFrame1["Age"].notna()]

print(dataFrame2)

"""

PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked

0 1 0 3 Braund, Mr. Owen Harris male 22.0 1 0 A/5 21171 7.2500 NaN S

1 2 1 1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 0 PC 17599 71.2833 C85 C

2 3 1 3 Heikkinen, Miss Laina female 26.0 0 0 STON/O2. 3101282 7.9250 NaN S

3 4 1 1 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 0 113803 53.1000 C123 S

4 5 0 3 Allen, Mr. William Henry male 35.0 0 0 373450 8.0500 NaN S

.. ... ... ... ... ... ... ... ... ... ... ... ...

885 886 0 3 Rice, Mrs. William (Margaret Norton) female 39.0 0 5 382652 29.1250 NaN Q

886 887 0 2 Montvila, Rev. Juozas male 27.0 0 0 211536 13.0000 NaN S

887 888 1 1 Graham, Miss Margaret Edith female 19.0 0 0 112053 30.0000 B42 S

889 890 1 1 Behr, Mr. Karl Howell male 26.0 0 0 111369 30.0000 C148 C

890 891 0 3 Dooley, Mr. Patrick male 32.0 0 0 370376 7.7500 NaN Q

[714 rows x 12 columns]

"""

▶ requirements.txt


defusedxml==0.7.1
et_xmlfile==2.0.0
numpy==2.1.2
odfpy==1.4.1
openpyxl==3.1.5
pandas==2.2.3
python-calamine==0.2.3
python-dateutil==2.9.0.post0
pytz==2024.2
pyxlsb==1.0.10
six==1.16.0
tzdata==2024.2
xlrd==2.0.1
XlsxWriter==3.2.0

defusedxml==0.7.1

et_xmlfile==2.0.0

numpy==2.1.2

odfpy==1.4.1

openpyxl==3.1.5

pandas==2.2.3

python-calamine==0.2.3

python-dateutil==2.9.0.post0

pytz==2024.2

pyxlsb==1.0.10

six==1.16.0

tzdata==2024.2

xlrd==2.0.1

XlsxWriter==3.2.0

※

[PYTHON/PANDAS] Series 클래스 : notna 메소드를 사용해 값 여부 Series 객체 구하기

■ Series 클래스의 notna 메소드를 사용해 값 여부 Series 객체를 구하는 방법을 보여준다. ▶ main.py


import pandas as pd

dataFrame = pd.read_excel("titanic.xlsx", sheet_name = "passengers")

series1 = dataFrame["Age"]

series2 = series1.notna()

print(series2)

"""
0       True
1       True
2       True
3       True
4       True
       ...
886     True
887     True
888    False
889     True
890     True
Name: Age, Length: 891, dtype: bool
"""

import pandas as pd

dataFrame = pd.read_excel("titanic.xlsx", sheet_name = "passengers")

series1 = dataFrame["Age"]

series2 = series1.notna()

print(series2)

"""

0 True

1 True

2 True

3 True

4 True

...

886 True

887 True

888 False

889 True

890 True

Name: Age, Length: 891, dtype: bool

"""

▶ requirements.txt


defusedxml==0.7.1
et_xmlfile==2.0.0
numpy==2.1.2
odfpy==1.4.1
openpyxl==3.1.5
pandas==2.2.3
python-calamine==0.2.3
python-dateutil==2.9.0.post0
pytz==2024.2
pyxlsb==1.0.10
six==1.16.0
tzdata==2024.2
xlrd==2.0.1
XlsxWriter==3.2.0

defusedxml==0.7.1

et_xmlfile==2.0.0

numpy==2.1.2

odfpy==1.4.1

openpyxl==3.1.5

pandas==2.2.3

python-calamine==0.2.3

python-dateutil==2.9.0.post0

pytz==2024.2

pyxlsb==1.0.10

six==1.16.0

tzdata==2024.2

xlrd==2.0.1

XlsxWriter==3.2.0

※ pip install

[PYTHON/PANDAS] DataFrame 클래스 : [] 연산자를 사용해 특정 컬럼에서 특정 값들을 갖는 DataFrame 객체 구하기 2

■ DataFrame 클래스에서 [] 연산자를 사용해 특정 컬럼에서 특정 값들을 갖는 DataFrame 객체를 구하는 방법을 보여준다. ▶ main.py


import pandas as pd

dataFrame1 = pd.read_excel("titanic.xlsx", sheet_name = "passengers")

series = dataFrame1["Pclass"]

dataFrame2 = dataFrame1[(dataFrame1["Pclass"] == 2) | (dataFrame1["Pclass"] == 3)]

print(dataFrame2)

"""
     PassengerId  Survived  Pclass                                     Name     Sex   Age  SibSp  Parch            Ticket     Fare Cabin Embarked
0              1         0       3                  Braund, Mr. Owen Harris    male  22.0      1      0         A/5 21171   7.2500   NaN        S
2              3         1       3                    Heikkinen, Miss Laina  female  26.0      0      0  STON/O2. 3101282   7.9250   NaN        S
4              5         0       3                 Allen, Mr. William Henry    male  35.0      0      0            373450   8.0500   NaN        S
5              6         0       3                         Moran, Mr. James    male   NaN      0      0            330877   8.4583   NaN        Q
7              8         0       3            Palsson, Master Gosta Leonard    male   2.0      3      1            349909  21.0750   NaN        S
..           ...       ...     ...                                      ...     ...   ...    ...    ...               ...      ...   ...      ...
884          885         0       3                   Sutehall, Mr. Henry Jr    male  25.0      0      0   SOTON/OQ 392076   7.0500   NaN        S
885          886         0       3     Rice, Mrs. William (Margaret Norton)  female  39.0      0      5            382652  29.1250   NaN        Q
886          887         0       2                    Montvila, Rev. Juozas    male  27.0      0      0            211536  13.0000   NaN        S
888          889         0       3  Johnston, Miss Catherine Helen "Carrie"  female   NaN      1      2        W./C. 6607  23.4500   NaN        S
890          891         0       3                      Dooley, Mr. Patrick    male  32.0      0      0            370376   7.7500   NaN        Q

[675 rows x 12 columns]
"""

import pandas as pd

dataFrame1 = pd.read_excel("titanic.xlsx", sheet_name = "passengers")

series = dataFrame1["Pclass"]

dataFrame2 = dataFrame1[(dataFrame1["Pclass"] == 2) | (dataFrame1["Pclass"] == 3)]

print(dataFrame2)

"""

PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked

0 1 0 3 Braund, Mr. Owen Harris male 22.0 1 0 A/5 21171 7.2500 NaN S

2 3 1 3 Heikkinen, Miss Laina female 26.0 0 0 STON/O2. 3101282 7.9250 NaN S

4 5 0 3 Allen, Mr. William Henry male 35.0 0 0 373450 8.0500 NaN S

5 6 0 3 Moran, Mr. James male NaN 0 0 330877 8.4583 NaN Q

7 8 0 3 Palsson, Master Gosta Leonard male 2.0 3 1 349909 21.0750 NaN S

.. ... ... ... ... ... ... ... ... ... ... ... ...

884 885 0 3 Sutehall, Mr. Henry Jr male 25.0 0 0 SOTON/OQ 392076 7.0500 NaN S

885 886 0 3 Rice, Mrs. William (Margaret Norton) female 39.0 0 5 382652 29.1250 NaN Q

886 887 0 2 Montvila, Rev. Juozas male 27.0 0 0 211536 13.0000 NaN S

888 889 0 3 Johnston, Miss Catherine Helen "Carrie" female NaN 1 2 W./C. 6607 23.4500 NaN S

890 891 0 3 Dooley, Mr. Patrick male 32.0 0 0 370376 7.7500 NaN Q

[675 rows x 12 columns]

"""

▶ requirements.txt


defusedxml==0.7.1
et_xmlfile==2.0.0
numpy==2.1.2
odfpy==1.4.1
openpyxl==3.1.5
pandas==2.2.3
python-calamine==0.2.3
python-dateutil==2.9.0.post0
pytz==2024.2
pyxlsb==1.0.10
six==1.16.0
tzdata==2024.2
xlrd==2.0.1
XlsxWriter==3.2.0

defusedxml==0.7.1

et_xmlfile==2.0.0

numpy==2.1.2

odfpy==1.4.1

openpyxl==3.1.5

pandas==2.2.3

python-calamine==0.2.3

python-dateutil==2.9.0.post0

pytz==2024.2

pyxlsb==1.0.10

six==1.16.0

tzdata==2024.2

xlrd==2.0.1

XlsxWriter==3.2.0

[PYTHON/PANDAS] DataFrame 클래스 : [] 연산자를 사용해 특정 컬럼에서 특정 값들을 갖는 DataFrame 객체 구하기 1

■ DataFrame 클래스에서 [] 연산자를 사용해 특정 컬럼에서 특정 값들을 갖는 DataFrame 객체를 구하는 방법을 보여준다. ▶ main.py


import pandas as pd

dataFrame1 = pd.read_excel("titanic.xlsx", sheet_name = "passengers")

series = dataFrame1["Pclass"]

dataFrame2 = dataFrame1[series.isin([2, 3])]

print(dataFrame2)

"""
     PassengerId  Survived  Pclass                                     Name     Sex   Age  SibSp  Parch            Ticket     Fare Cabin Embarked
0              1         0       3                  Braund, Mr. Owen Harris    male  22.0      1      0         A/5 21171   7.2500   NaN        S
2              3         1       3                    Heikkinen, Miss Laina  female  26.0      0      0  STON/O2. 3101282   7.9250   NaN        S
4              5         0       3                 Allen, Mr. William Henry    male  35.0      0      0            373450   8.0500   NaN        S
5              6         0       3                         Moran, Mr. James    male   NaN      0      0            330877   8.4583   NaN        Q
7              8         0       3            Palsson, Master Gosta Leonard    male   2.0      3      1            349909  21.0750   NaN        S
..           ...       ...     ...                                      ...     ...   ...    ...    ...               ...      ...   ...      ...
884          885         0       3                   Sutehall, Mr. Henry Jr    male  25.0      0      0   SOTON/OQ 392076   7.0500   NaN        S
885          886         0       3     Rice, Mrs. William (Margaret Norton)  female  39.0      0      5            382652  29.1250   NaN        Q
886          887         0       2                    Montvila, Rev. Juozas    male  27.0      0      0            211536  13.0000   NaN        S
888          889         0       3  Johnston, Miss Catherine Helen "Carrie"  female   NaN      1      2        W./C. 6607  23.4500   NaN        S
890          891         0       3                      Dooley, Mr. Patrick    male  32.0      0      0            370376   7.7500   NaN        Q

[675 rows x 12 columns]
"""

import pandas as pd

dataFrame1 = pd.read_excel("titanic.xlsx", sheet_name = "passengers")

series = dataFrame1["Pclass"]

dataFrame2 = dataFrame1[series.isin([2, 3])]

print(dataFrame2)

"""

PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked

0 1 0 3 Braund, Mr. Owen Harris male 22.0 1 0 A/5 21171 7.2500 NaN S

2 3 1 3 Heikkinen, Miss Laina female 26.0 0 0 STON/O2. 3101282 7.9250 NaN S

4 5 0 3 Allen, Mr. William Henry male 35.0 0 0 373450 8.0500 NaN S

5 6 0 3 Moran, Mr. James male NaN 0 0 330877 8.4583 NaN Q

7 8 0 3 Palsson, Master Gosta Leonard male 2.0 3 1 349909 21.0750 NaN S

.. ... ... ... ... ... ... ... ... ... ... ... ...

884 885 0 3 Sutehall, Mr. Henry Jr male 25.0 0 0 SOTON/OQ 392076 7.0500 NaN S

885 886 0 3 Rice, Mrs. William (Margaret Norton) female 39.0 0 5 382652 29.1250 NaN Q

886 887 0 2 Montvila, Rev. Juozas male 27.0 0 0 211536 13.0000 NaN S

888 889 0 3 Johnston, Miss Catherine Helen "Carrie" female NaN 1 2 W./C. 6607 23.4500 NaN S

890 891 0 3 Dooley, Mr. Patrick male 32.0 0 0 370376 7.7500 NaN Q

[675 rows x 12 columns]

"""

▶ requirements.txt


defusedxml==0.7.1
et_xmlfile==2.0.0
numpy==2.1.2
odfpy==1.4.1
openpyxl==3.1.5
pandas==2.2.3
python-calamine==0.2.3
python-dateutil==2.9.0.post0
pytz==2024.2
pyxlsb==1.0.10
six==1.16.0
tzdata==2024.2
xlrd==2.0.1
XlsxWriter==3.2.0

defusedxml==0.7.1

et_xmlfile==2.0.0

numpy==2.1.2

odfpy==1.4.1

openpyxl==3.1.5

pandas==2.2.3

python-calamine==0.2.3

python-dateutil==2.9.0.post0

pytz==2024.2

pyxlsb==1.0.10

six==1.16.0

tzdata==2024.2

xlrd==2.0.1

XlsxWriter==3.2.0

[PYTHON/PANDAS] Series 클래스 : isin 메소드를 사용해 지정 값 리스트 포함 여부 Series 객체 구하기 1

■ Series 클래스의 isin 메소드를 사용해 지정 값 리스트에 포함 여부를 갖는 Series 객체를 구하는 방법을 보여준다. ▶ main.py


import pandas as pd

dataFrame = pd.read_excel("titanic.xlsx", sheet_name = "passengers")

series1 = dataFrame["Pclass"]

series2 = series1.isin([2, 3])

print(series2)

"""
0       True
1      False
2       True
3      False
4       True
       ...
886     True
887    False
888     True
889    False
890     True
Name: Pclass, Length: 891, dtype: bool
"""

import pandas as pd

dataFrame = pd.read_excel("titanic.xlsx", sheet_name = "passengers")

series1 = dataFrame["Pclass"]

series2 = series1.isin([2, 3])

print(series2)

"""

0 True

1 False

2 True

3 False

4 True

...

886 True

887 False

888 True

889 False

890 True

Name: Pclass, Length: 891, dtype: bool

"""

▶ requirements.txt

[PYTHON/PANDAS] DataFrame 클래스 : 특정 조건에 해당하는 데이터를 갖는 DataFrame 객체 구하기

■ DataFrame 클래스에서 특정 조건에 해당하는 데이터를 갖는 DataFrame 객체를 구하는 방법을 보여준다. ▶ main.py


import pandas as pd

dataFrame1 = pd.read_excel("titanic.xlsx", sheet_name = "passengers")

dataFrame2 = dataFrame1[dataFrame1["Age"] > 35]

print(dataFrame2)

"""
     PassengerId  Survived  Pclass                                               Name     Sex   Age  SibSp  Parch    Ticket     Fare Cabin Embarked
1              2         1       1  Cumings, Mrs. John Bradley (Florence Briggs Th...  female  38.0      1      0  PC 17599  71.2833   C85        C
6              7         0       1                            McCarthy, Mr. Timothy J    male  54.0      0      0     17463  51.8625   E46        S
11            12         1       1                            Bonnell, Miss Elizabeth  female  58.0      0      0    113783  26.5500  C103        S
13            14         0       3                        Andersson, Mr. Anders Johan    male  39.0      1      5    347082  31.2750   NaN        S
15            16         1       2                   Hewlett, Mrs. (Mary D Kingcome)   female  55.0      0      0    248706  16.0000   NaN        S
..           ...       ...     ...                                                ...     ...   ...    ...    ...       ...      ...   ...      ...
865          866         1       2                           Bystrom, Mrs. (Karolina)  female  42.0      0      0    236852  13.0000   NaN        S
871          872         1       1   Beckwith, Mrs. Richard Leonard (Sallie Monypeny)  female  47.0      1      1     11751  52.5542   D35        S
873          874         0       3                        Vander Cruyssen, Mr. Victor    male  47.0      0      0    345765   9.0000   NaN        S
879          880         1       1      Potter, Mrs. Thomas Jr (Lily Alexenia Wilson)  female  56.0      0      1     11767  83.1583   C50        C
885          886         0       3               Rice, Mrs. William (Margaret Norton)  female  39.0      0      5    382652  29.1250   NaN        Q

[217 rows x 12 columns]
"""

import pandas as pd

dataFrame1 = pd.read_excel("titanic.xlsx", sheet_name = "passengers")

dataFrame2 = dataFrame1[dataFrame1["Age"] > 35]

print(dataFrame2)

"""

PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked

1 2 1 1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 0 PC 17599 71.2833 C85 C

6 7 0 1 McCarthy, Mr. Timothy J male 54.0 0 0 17463 51.8625 E46 S

11 12 1 1 Bonnell, Miss Elizabeth female 58.0 0 0 113783 26.5500 C103 S

13 14 0 3 Andersson, Mr. Anders Johan male 39.0 1 5 347082 31.2750 NaN S

15 16 1 2 Hewlett, Mrs. (Mary D Kingcome) female 55.0 0 0 248706 16.0000 NaN S

.. ... ... ... ... ... ... ... ... ... ... ... ...

865 866 1 2 Bystrom, Mrs. (Karolina) female 42.0 0 0 236852 13.0000 NaN S

871 872 1 1 Beckwith, Mrs. Richard Leonard (Sallie Monypeny) female 47.0 1 1 11751 52.5542 D35 S

873 874 0 3 Vander Cruyssen, Mr. Victor male 47.0 0 0 345765 9.0000 NaN S

879 880 1 1 Potter, Mrs. Thomas Jr (Lily Alexenia Wilson) female 56.0 0 1 11767 83.1583 C50 C

885 886 0 3 Rice, Mrs. William (Margaret Norton) female 39.0 0 5 382652 29.1250 NaN Q

[217 rows x 12 columns]

"""

▶ requirements.txt


defusedxml==0.7.1
et_xmlfile==2.0.0
numpy==2.1.2
odfpy==1.4.1
openpyxl==3.1.5
pandas==2.2.3
python-calamine==0.2.3
python-dateutil==2.9.0.post0
pytz==2024.2
pyxlsb==1.0.10
six==1.16.0
tzdata==2024.2
xlrd==2.0.1
XlsxWriter==3.2.0

defusedxml==0.7.1

et_xmlfile==2.0.0

numpy==2.1.2

odfpy==1.4.1

openpyxl==3.1.5

pandas==2.2.3

python-calamine==0.2.3

python-dateutil==2.9.0.post0

pytz==2024.2

pyxlsb==1.0.10

six==1.16.0

tzdata==2024.2

xlrd==2.0.1

XlsxWriter==3.2.0

※ pip install

[PYTHON/PANDAS] DataFrame 클래스 : [] 연산자를 사용해 특정 컬럼들의 데이터를 갖는 DataFrame 객체 구하기

■ DataFrame 클래스에서 [] 연산자를 사용해 특정 컬럼들의 데이터를 갖는 DataFrame 객체를 구하는 방법을 보여준다. ▶ main.py


import pandas as pd

dataFrame1 = pd.read_excel("titanic.xlsx", sheet_name = "passengers")

dataFrame2 = dataFrame1[["Age", "Sex"]]

print(dataFrame2)

"""
      Age     Sex
0    22.0    male
1    38.0  female
2    26.0  female
3    35.0  female
4    35.0    male
..    ...     ...
886  27.0    male
887  19.0  female
888   NaN  female
889  26.0    male
890  32.0    male

[891 rows x 2 columns]
"""

import pandas as pd

dataFrame1 = pd.read_excel("titanic.xlsx", sheet_name = "passengers")

dataFrame2 = dataFrame1[["Age", "Sex"]]

print(dataFrame2)

"""

Age Sex

0 22.0 male

1 38.0 female

2 26.0 female

3 35.0 female

4 35.0 male

.. ... ...

886 27.0 male

887 19.0 female

888 NaN female

889 26.0 male

890 32.0 male

[891 rows x 2 columns]

"""

▶ requirements.txt


defusedxml==0.7.1
et_xmlfile==2.0.0
numpy==2.1.2
odfpy==1.4.1
openpyxl==3.1.5
pandas==2.2.3
python-calamine==0.2.3
python-dateutil==2.9.0.post0
pytz==2024.2
pyxlsb==1.0.10
six==1.16.0
tzdata==2024.2
xlrd==2.0.1
XlsxWriter==3.2.0

defusedxml==0.7.1

et_xmlfile==2.0.0

numpy==2.1.2

odfpy==1.4.1

openpyxl==3.1.5

pandas==2.2.3

python-calamine==0.2.3

python-dateutil==2.9.0.post0

pytz==2024.2

pyxlsb==1.0.10

six==1.16.0

tzdata==2024.2

xlrd==2.0.1

XlsxWriter==3.2.0

※

[PYTHON/PANDAS] Series 클래스 : shape 속성을 사용해 데이터 차원 구하기

■ Series 클래스의 shape 속성을 사용해 데이터 차원을 구하는 방법을 보여준다. ▶ main.py


import pandas as pd

dataFrame = pd.read_excel("titanic.xlsx", sheet_name = "passengers")

series = dataFrame["Age"]

shapeTuple = series.shape

print(shapeTuple)

"""
(891,)
"""

import pandas as pd

dataFrame = pd.read_excel("titanic.xlsx", sheet_name = "passengers")

series = dataFrame["Age"]

shapeTuple = series.shape

print(shapeTuple)

"""

(891,)

"""

▶ requirements.txt


defusedxml==0.7.1
et_xmlfile==2.0.0
numpy==2.1.2
odfpy==1.4.1
openpyxl==3.1.5
pandas==2.2.3
python-calamine==0.2.3
python-dateutil==2.9.0.post0
pytz==2024.2
pyxlsb==1.0.10
six==1.16.0
tzdata==2024.2
xlrd==2.0.1
XlsxWriter==3.2.0

defusedxml==0.7.1

et_xmlfile==2.0.0

numpy==2.1.2

odfpy==1.4.1

openpyxl==3.1.5

pandas==2.2.3

python-calamine==0.2.3

python-dateutil==2.9.0.post0

pytz==2024.2

pyxlsb==1.0.10

six==1.16.0

tzdata==2024.2

xlrd==2.0.1

XlsxWriter==3.2.0

※ pip install "pandas[excel]" 명령을

[PYTHON/PANDAS] DataFrame 클래스 : shape 속성을 사용해 데이터 차원 구하기

■ DataFrame 클래스의 shape 속성을 사용해 데이터 차원을 구하는 방법을 보여준다. ▶ main.py


import pandas as pd

dataFrame = pd.read_excel("titanic.xlsx", sheet_name = "passengers")

shapeTuple = dataFrame.shape

print(shapeTuple)

"""
(891, 12)
"""

import pandas as pd

dataFrame = pd.read_excel("titanic.xlsx", sheet_name = "passengers")

shapeTuple = dataFrame.shape

print(shapeTuple)

"""

(891, 12)

"""

▶ requirements.txt


defusedxml==0.7.1
et_xmlfile==2.0.0
numpy==2.1.2
odfpy==1.4.1
openpyxl==3.1.5
pandas==2.2.3
python-calamine==0.2.3
python-dateutil==2.9.0.post0
pytz==2024.2
pyxlsb==1.0.10
six==1.16.0
tzdata==2024.2
xlrd==2.0.1
XlsxWriter==3.2.0

defusedxml==0.7.1

et_xmlfile==2.0.0

numpy==2.1.2

odfpy==1.4.1

openpyxl==3.1.5

pandas==2.2.3

python-calamine==0.2.3

python-dateutil==2.9.0.post0

pytz==2024.2

pyxlsb==1.0.10

six==1.16.0

tzdata==2024.2

xlrd==2.0.1

XlsxWriter==3.2.0

※ pip install "pandas[excel]" 명령을

[PYTHON/PANDAS] Series 클래스 : head 메소드를 사용해 선행 데이터 구하기

■ Series 클래스의 head 메소드를 사용해 선행 데이터를 구하는 방법을 보여준다. ▶ main.py


import pandas as pd

dataFrame = pd.read_excel("titanic.xlsx", sheet_name = "passengers")

series1 = dataFrame["Age"]

series2 = series1.head()

print(series2)

"""
0    22.0
1    38.0
2    26.0
3    35.0
4    35.0
Name: Age, dtype: float64
"""

import pandas as pd

dataFrame = pd.read_excel("titanic.xlsx", sheet_name = "passengers")

series1 = dataFrame["Age"]

series2 = series1.head()

print(series2)

"""

0 22.0

1 38.0

2 26.0

3 35.0

4 35.0

Name: Age, dtype: float64

"""

▶ requirements.txt


defusedxml==0.7.1
et_xmlfile==2.0.0
numpy==2.1.2
odfpy==1.4.1
openpyxl==3.1.5
pandas==2.2.3
python-calamine==0.2.3
python-dateutil==2.9.0.post0
pytz==2024.2
pyxlsb==1.0.10
six==1.16.0
tzdata==2024.2
xlrd==2.0.1
XlsxWriter==3.2.0

defusedxml==0.7.1

et_xmlfile==2.0.0

numpy==2.1.2

odfpy==1.4.1

openpyxl==3.1.5

pandas==2.2.3

python-calamine==0.2.3

python-dateutil==2.9.0.post0

pytz==2024.2

pyxlsb==1.0.10

six==1.16.0

tzdata==2024.2

xlrd==2.0.1

XlsxWriter==3.2.0

※ pip install "pandas[excel]" 명령을

[PYTHON/PANDAS] DataFrame 클래스 : info 메소드를 사용해 데이터 정보 구하기

■ DataFrame 클래스의 info 메소드를 사용해 데이터 정보를 구하는 방법을 보여준다. ▶ main.py


import pandas as pd

dataFrame1 = pd.read_excel("titanic.xlsx", sheet_name = "passengers")

dataFrame2 = dataFrame1.info()

print(dataFrame2)

"""
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
 #   Column       Non-Null Count  Dtype
---  ------       --------------  -----
 0   PassengerId  891 non-null    int64
 1   Survived     891 non-null    int64
 2   Pclass       891 non-null    int64
 3   Name         891 non-null    object
 4   Sex          891 non-null    object
 5   Age          714 non-null    float64
 6   SibSp        891 non-null    int64
 7   Parch        891 non-null    int64
 8   Ticket       891 non-null    object
 9   Fare         891 non-null    float64
 10  Cabin        204 non-null    object
 11  Embarked     889 non-null    object
dtypes: float64(2), int64(5), object(5)
memory usage: 83.7+ KB
None
"""

import pandas as pd

dataFrame1 = pd.read_excel("titanic.xlsx", sheet_name = "passengers")

dataFrame2 = dataFrame1.info()

print(dataFrame2)

"""

RangeIndex: 891 entries, 0 to 890

Data columns (total 12 columns):

# Column Non-Null Count Dtype

--- ------ -------------- -----

0 PassengerId 891 non-null int64

1 Survived 891 non-null int64

2 Pclass 891 non-null int64

3 Name 891 non-null object

4 Sex 891 non-null object

5 Age 714 non-null float64

6 SibSp 891 non-null int64

7 Parch 891 non-null int64

8 Ticket 891 non-null object

9 Fare 891 non-null float64

10 Cabin 204 non-null object

11 Embarked 889 non-null object

dtypes: float64(2), int64(5), object(5)

memory usage: 83.7+ KB

None

"""

▶ requirements.txt


defusedxml==0.7.1
et_xmlfile==2.0.0
numpy==2.1.2
odfpy==1.4.1
openpyxl==3.1.5
pandas==2.2.3
python-calamine==0.2.3
python-dateutil==2.9.0.post0
pytz==2024.2
pyxlsb==1.0.10
six==1.16.0
tzdata==2024.2
xlrd==2.0.1
XlsxWriter==3.2.0

defusedxml==0.7.1

et_xmlfile==2.0.0

numpy==2.1.2

odfpy==1.4.1

openpyxl==3.1.5

pandas==2.2.3

python-calamine==0.2.3

python-dateutil==2.9.0.post0

pytz==2024.2

pyxlsb==1.0.10

six==1.16.0

tzdata==2024.2

xlrd==2.0.1

XlsxWriter==3.2.0

※ pip install "pandas[excel]" 명령을

[PYTHON/PANDAS] read_excel 함수 : sheet_name 인자를 사용해 엑셀 파일 데이터 로드하기

■ read_excel 함수의 sheet_name 인자를 사용해 엑셀 파일 데이터를 로드하는 방법을 보여준다. ▶ main.py


import pandas as pd

dataFrame = pd.read_excel("titanic.xlsx", sheet_name = "passengers")

print(dataFrame)

"""
     PassengerId  Survived  Pclass                                               Name     Sex   Age  SibSp  Parch            Ticket     Fare Cabin Embarked
0              1         0       3                            Braund, Mr. Owen Harris    male  22.0      1      0         A/5 21171   7.2500   NaN        S
1              2         1       1  Cumings, Mrs. John Bradley (Florence Briggs Th...  female  38.0      1      0          PC 17599  71.2833   C85        C
2              3         1       3                              Heikkinen, Miss Laina  female  26.0      0      0  STON/O2. 3101282   7.9250   NaN        S
3              4         1       1       Futrelle, Mrs. Jacques Heath (Lily May Peel)  female  35.0      1      0            113803  53.1000  C123        S
4              5         0       3                           Allen, Mr. William Henry    male  35.0      0      0            373450   8.0500   NaN        S
..           ...       ...     ...                                                ...     ...   ...    ...    ...               ...      ...   ...      ...
886          887         0       2                              Montvila, Rev. Juozas    male  27.0      0      0            211536  13.0000   NaN        S
887          888         1       1                        Graham, Miss Margaret Edith  female  19.0      0      0            112053  30.0000   B42        S
888          889         0       3            Johnston, Miss Catherine Helen "Carrie"  female   NaN      1      2        W./C. 6607  23.4500   NaN        S
889          890         1       1                              Behr, Mr. Karl Howell    male  26.0      0      0            111369  30.0000  C148        C
890          891         0       3                                Dooley, Mr. Patrick    male  32.0      0      0            370376   7.7500   NaN        Q

[891 rows x 12 columns]
"""

import pandas as pd

dataFrame = pd.read_excel("titanic.xlsx", sheet_name = "passengers")

print(dataFrame)

"""

PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked

0 1 0 3 Braund, Mr. Owen Harris male 22.0 1 0 A/5 21171 7.2500 NaN S

1 2 1 1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 0 PC 17599 71.2833 C85 C

2 3 1 3 Heikkinen, Miss Laina female 26.0 0 0 STON/O2. 3101282 7.9250 NaN S

3 4 1 1 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 0 113803 53.1000 C123 S

4 5 0 3 Allen, Mr. William Henry male 35.0 0 0 373450 8.0500 NaN S

.. ... ... ... ... ... ... ... ... ... ... ... ...

886 887 0 2 Montvila, Rev. Juozas male 27.0 0 0 211536 13.0000 NaN S

887 888 1 1 Graham, Miss Margaret Edith female 19.0 0 0 112053 30.0000 B42 S

888 889 0 3 Johnston, Miss Catherine Helen "Carrie" female NaN 1 2 W./C. 6607 23.4500 NaN S

889 890 1 1 Behr, Mr. Karl Howell male 26.0 0 0 111369 30.0000 C148 C

890 891 0 3 Dooley, Mr. Patrick male 32.0 0 0 370376 7.7500 NaN Q

[891 rows x 12 columns]

"""

▶ requirements.txt


defusedxml==0.7.1
et_xmlfile==2.0.0
numpy==2.1.2
odfpy==1.4.1
openpyxl==3.1.5
pandas==2.2.3
python-calamine==0.2.3
python-dateutil==2.9.0.post0
pytz==2024.2
pyxlsb==1.0.10
six==1.16.0
tzdata==2024.2
xlrd==2.0.1
XlsxWriter==3.2.0

defusedxml==0.7.1

et_xmlfile==2.0.0

numpy==2.1.2

odfpy==1.4.1

openpyxl==3.1.5

pandas==2.2.3

python-calamine==0.2.3

python-dateutil==2.9.0.post0

pytz==2024.2

pyxlsb==1.0.10

six==1.16.0

tzdata==2024.2

xlrd==2.0.1

XlsxWriter==3.2.0

※ pip install "pandas[excel]"

[PYTHON/PANDAS] DataFrame 클래스 : to_excel 메소드에서 sheet_name/index 인자를 사용해 엑셀 파일 만들기

■ DataFrame 클래스의 to_excel 메소드에서 sheet_name/index 인자를 사용해 엑셀 파일을 만드는 방법을 보여준다. ▶ main.py


import pandas as pd

dataFrame = pd.read_csv("titanic.csv")

dataFrame.to_excel("titanic.xlsx", sheet_name = "passengers", index = False)

import pandas as pd

dataFrame = pd.read_csv("titanic.csv")

dataFrame.to_excel("titanic.xlsx", sheet_name = "passengers", index = False)

▶ requirements.txt


defusedxml==0.7.1
et_xmlfile==2.0.0
numpy==2.1.2
odfpy==1.4.1
openpyxl==3.1.5
pandas==2.2.3
python-calamine==0.2.3
python-dateutil==2.9.0.post0
pytz==2024.2
pyxlsb==1.0.10
six==1.16.0
tzdata==2024.2
xlrd==2.0.1
XlsxWriter==3.2.0

defusedxml==0.7.1

et_xmlfile==2.0.0

numpy==2.1.2

odfpy==1.4.1

openpyxl==3.1.5

pandas==2.2.3

python-calamine==0.2.3

python-dateutil==2.9.0.post0

pytz==2024.2

pyxlsb==1.0.10

six==1.16.0

tzdata==2024.2

xlrd==2.0.1

XlsxWriter==3.2.0

※ pip install

[PYTHON/PANDAS] DataFrame 클래스 : dtypes 속성을 사용해 컬럼 데이터 타입 Series 객체 구하기

■ DataFrame 클래스의 dtypes 속성을 사용해 컬럼 데이터 타입 Series 객체를 구하는 방법을 보여준다. ▶ main.py


import pandas as pd

dataFrame = pd.read_csv("titanic.csv")

series = dataFrame.dtypes

print(series)

"""
PassengerId      int64
Survived         int64
Pclass           int64
Name            object
Sex             object
Age            float64
SibSp            int64
Parch            int64
Ticket          object
Fare           float64
Cabin           object
Embarked        object
dtype: object
"""

import pandas as pd

dataFrame = pd.read_csv("titanic.csv")

series = dataFrame.dtypes

print(series)

"""

PassengerId int64

Survived int64

Pclass int64

Name object

Sex object

Age float64

SibSp int64

Parch int64

Ticket object

Fare float64

Cabin object

Embarked object

dtype: object

"""

▶ requirements.txt


numpy==2.1.2
pandas==2.2.3
python-dateutil==2.9.0.post0
pytz==2024.2
six==1.16.0
tzdata==2024.2

numpy==2.1.2

pandas==2.2.3

python-dateutil==2.9.0.post0

pytz==2024.2

six==1.16.0

tzdata==2024.2

※ pip

[PYTHON/PANDAS] DataFrame 클래스 : tail 메소드를 사용해 후행 데이터 구하기

■ DataFrame 클래스의 tail 메소드를 사용해 후행 데이터를 구하는 방법을 보여준다. ▶ main.py


import pandas as pd

dataFrame1 = pd.read_csv("titanic.csv")

dataFrame2 = dataFrame1.tail(8)

print(dataFrame2)

"""
     PassengerId  Survived  Pclass                                     Name     Sex   Age  SibSp  Parch            Ticket    Fare Cabin Embarked
883          884         0       2            Banfield, Mr. Frederick James    male  28.0      0      0  C.A./SOTON 34068  10.500   NaN        S
884          885         0       3                   Sutehall, Mr. Henry Jr    male  25.0      0      0   SOTON/OQ 392076   7.050   NaN        S
885          886         0       3     Rice, Mrs. William (Margaret Norton)  female  39.0      0      5            382652  29.125   NaN        Q
886          887         0       2                    Montvila, Rev. Juozas    male  27.0      0      0            211536  13.000   NaN        S
887          888         1       1              Graham, Miss Margaret Edith  female  19.0      0      0            112053  30.000   B42        S
888          889         0       3  Johnston, Miss Catherine Helen "Carrie"  female   NaN      1      2        W./C. 6607  23.450   NaN        S
889          890         1       1                    Behr, Mr. Karl Howell    male  26.0      0      0            111369  30.000  C148        C
890          891         0       3                      Dooley, Mr. Patrick    male  32.0      0      0            370376   7.750   NaN        Q
"""

import pandas as pd

dataFrame1 = pd.read_csv("titanic.csv")

dataFrame2 = dataFrame1.tail(8)

print(dataFrame2)

"""

PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked

883 884 0 2 Banfield, Mr. Frederick James male 28.0 0 0 C.A./SOTON 34068 10.500 NaN S

884 885 0 3 Sutehall, Mr. Henry Jr male 25.0 0 0 SOTON/OQ 392076 7.050 NaN S

885 886 0 3 Rice, Mrs. William (Margaret Norton) female 39.0 0 5 382652 29.125 NaN Q

886 887 0 2 Montvila, Rev. Juozas male 27.0 0 0 211536 13.000 NaN S

887 888 1 1 Graham, Miss Margaret Edith female 19.0 0 0 112053 30.000 B42 S

888 889 0 3 Johnston, Miss Catherine Helen "Carrie" female NaN 1 2 W./C. 6607 23.450 NaN S

889 890 1 1 Behr, Mr. Karl Howell male 26.0 0 0 111369 30.000 C148 C

890 891 0 3 Dooley, Mr. Patrick male 32.0 0 0 370376 7.750 NaN Q

"""

▶ requirements.txt


numpy==2.1.2
pandas==2.2.3
python-dateutil==2.9.0.post0
pytz==2024.2
six==1.16.0
tzdata==2024.2

numpy==2.1.2

pandas==2.2.3

python-dateutil==2.9.0.post0

pytz==2024.2

six==1.16.0

tzdata==2024.2

※ pip install pandas 명령을

[PYTHON/PANDAS] DataFrame 클래스 : head 메소드를 사용해 선행 데이터 구하기

■ DataFrame 클래스의 head 메소드를 사용해 선행 데이터를 구하는 방법을 보여준다. ▶ main.py


import pandas as pd

dataFrame1 = pd.read_csv("titanic.csv")

dataFrame2 = dataFrame1.head(8)

print(dataFrame2)

"""
   PassengerId  Survived  Pclass                                               Name     Sex   Age  SibSp  Parch            Ticket     Fare Cabin Embarked
0            1         0       3                            Braund, Mr. Owen Harris    male  22.0      1      0         A/5 21171   7.2500   NaN        S
1            2         1       1  Cumings, Mrs. John Bradley (Florence Briggs Th...  female  38.0      1      0          PC 17599  71.2833   C85        C
2            3         1       3                              Heikkinen, Miss Laina  female  26.0      0      0  STON/O2. 3101282   7.9250   NaN        S
3            4         1       1       Futrelle, Mrs. Jacques Heath (Lily May Peel)  female  35.0      1      0            113803  53.1000  C123        S
4            5         0       3                           Allen, Mr. William Henry    male  35.0      0      0            373450   8.0500   NaN        S
5            6         0       3                                   Moran, Mr. James    male   NaN      0      0            330877   8.4583   NaN        Q
6            7         0       1                            McCarthy, Mr. Timothy J    male  54.0      0      0             17463  51.8625   E46        S
7            8         0       3                      Palsson, Master Gosta Leonard    male   2.0      3      1            349909  21.0750   NaN        S
"""

import pandas as pd

dataFrame1 = pd.read_csv("titanic.csv")

dataFrame2 = dataFrame1.head(8)

print(dataFrame2)

"""

PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked

0 1 0 3 Braund, Mr. Owen Harris male 22.0 1 0 A/5 21171 7.2500 NaN S

1 2 1 1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 0 PC 17599 71.2833 C85 C

2 3 1 3 Heikkinen, Miss Laina female 26.0 0 0 STON/O2. 3101282 7.9250 NaN S

3 4 1 1 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 0 113803 53.1000 C123 S

4 5 0 3 Allen, Mr. William Henry male 35.0 0 0 373450 8.0500 NaN S

5 6 0 3 Moran, Mr. James male NaN 0 0 330877 8.4583 NaN Q

6 7 0 1 McCarthy, Mr. Timothy J male 54.0 0 0 17463 51.8625 E46 S

7 8 0 3 Palsson, Master Gosta Leonard male 2.0 3 1 349909 21.0750 NaN S

"""

▶ requirements.txt


numpy==2.1.2
pandas==2.2.3
python-dateutil==2.9.0.post0
pytz==2024.2
six==1.16.0
tzdata==2024.2

numpy==2.1.2

pandas==2.2.3

python-dateutil==2.9.0.post0

pytz==2024.2

six==1.16.0

tzdata==2024.2

※ pip install pandas 명령을

[PYTHON/PANDAS] read_csv 함수 : CSV 파일 데이터 로드하기

■ read_csv 함수를 사용해 CSV 파일 데이터를 로드하는 방법을 보여준다. ▶ main.py


import pandas as pd

dataFrame = pd.read_csv("titanic.csv")

print(dataFrame)

"""
     PassengerId  Survived  Pclass                                               Name     Sex   Age  SibSp  Parch            Ticket     Fare Cabin Embarked
0              1         0       3                            Braund, Mr. Owen Harris    male  22.0      1      0         A/5 21171   7.2500   NaN        S
1              2         1       1  Cumings, Mrs. John Bradley (Florence Briggs Th...  female  38.0      1      0          PC 17599  71.2833   C85        C
2              3         1       3                              Heikkinen, Miss Laina  female  26.0      0      0  STON/O2. 3101282   7.9250   NaN        S
3              4         1       1       Futrelle, Mrs. Jacques Heath (Lily May Peel)  female  35.0      1      0            113803  53.1000  C123        S
4              5         0       3                           Allen, Mr. William Henry    male  35.0      0      0            373450   8.0500   NaN        S
..           ...       ...     ...                                                ...     ...   ...    ...    ...               ...      ...   ...      ...
886          887         0       2                              Montvila, Rev. Juozas    male  27.0      0      0            211536  13.0000   NaN        S
887          888         1       1                        Graham, Miss Margaret Edith  female  19.0      0      0            112053  30.0000   B42        S
888          889         0       3            Johnston, Miss Catherine Helen "Carrie"  female   NaN      1      2        W./C. 6607  23.4500   NaN        S
889          890         1       1                              Behr, Mr. Karl Howell    male  26.0      0      0            111369  30.0000  C148        C
890          891         0       3                                Dooley, Mr. Patrick    male  32.0      0      0            370376   7.7500   NaN        Q

[891 rows x 12 columns]
"""

import pandas as pd

dataFrame = pd.read_csv("titanic.csv")

print(dataFrame)

"""

PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked

0 1 0 3 Braund, Mr. Owen Harris male 22.0 1 0 A/5 21171 7.2500 NaN S

1 2 1 1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 0 PC 17599 71.2833 C85 C

2 3 1 3 Heikkinen, Miss Laina female 26.0 0 0 STON/O2. 3101282 7.9250 NaN S

3 4 1 1 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 0 113803 53.1000 C123 S

4 5 0 3 Allen, Mr. William Henry male 35.0 0 0 373450 8.0500 NaN S

.. ... ... ... ... ... ... ... ... ... ... ... ...

886 887 0 2 Montvila, Rev. Juozas male 27.0 0 0 211536 13.0000 NaN S

887 888 1 1 Graham, Miss Margaret Edith female 19.0 0 0 112053 30.0000 B42 S

888 889 0 3 Johnston, Miss Catherine Helen "Carrie" female NaN 1 2 W./C. 6607 23.4500 NaN S

889 890 1 1 Behr, Mr. Karl Howell male 26.0 0 0 111369 30.0000 C148 C

890 891 0 3 Dooley, Mr. Patrick male 32.0 0 0 370376 7.7500 NaN Q

[891 rows x 12 columns]

"""

▶ requirements.txt


numpy==2.1.2
pandas==2.2.3
python-dateutil==2.9.0.post0
pytz==2024.2
six==1.16.0
tzdata==2024.2

numpy==2.1.2

pandas==2.2.3

python-dateutil==2.9.0.post0

pytz==2024.2

six==1.16.0

tzdata==2024.2

※ pip install pandans 명령을 실행했다.

[PYTHON/PANDAS] DataFrame 클래스 : describe 메소드를 사용해 통계 값 구하기

■ DataFrame 클래스의 describe 메소드를 사용해 통계 값을 구하는 방법을 보여준다. ▶ main.py


import pandas as pd

dataFrame1 = pd.DataFrame(
    {
        "Name" : [
            "Braund, Mr. Owen Harris",
            "Allen, Mr. William Henry",
            "Bonnell, Miss. Elizabeth"
        ],
        "Age" : [22, 35, 58],
        "Sex" : ["male", "male", "female"],
    }
)

dataFrame2 = dataFrame1.describe()

print(dataFrame2)

"""
             Age
count   3.000000
mean   38.333333
std    18.230012
min    22.000000
25%    28.500000
50%    35.000000
75%    46.500000
max    58.000000
"""

import pandas as pd

dataFrame1 = pd.DataFrame(

{

"Name" : [

"Braund, Mr. Owen Harris",

"Allen, Mr. William Henry",

"Bonnell, Miss. Elizabeth"

"Age" : [22, 35, 58],

"Sex" : ["male", "male", "female"],

}

)

dataFrame2 = dataFrame1.describe()

print(dataFrame2)

"""

Age

count 3.000000

mean 38.333333

std 18.230012

min 22.000000

25% 28.500000

50% 35.000000

75% 46.500000

max 58.000000

"""

▶ requirements.txt


numpy==2.1.2
pandas==2.2.3
python-dateutil==2.9.0.post0
pytz==2024.2
six==1.16.0
tzdata==2024.2

numpy==2.1.2

pandas==2.2.3

python-dateutil==2.9.0.post0

pytz==2024.2

six==1.16.0

tzdata==2024.2

※ pip install pandas 명령을

[PYTHON/PANDAS] Series 클래스 : describe 메소드를 사용해 통계 값 구하기

■ Series 클래스의 describe 메소드를 사용해 통계 값을 구하는 방법을 보여준다. ▶ main.py


import pandas as pd

series1 = pd.Series([22, 35, 58], name = "Age")

series2 = series1.describe()

print(series2)

"""
count     3.000000
mean     38.333333
std      18.230012
min      22.000000
25%      28.500000
50%      35.000000
75%      46.500000
max      58.000000
Name: Age, dtype: float64
"""

import pandas as pd

series1 = pd.Series([22, 35, 58], name = "Age")

series2 = series1.describe()

print(series2)

"""

count 3.000000

mean 38.333333

std 18.230012

min 22.000000

25% 28.500000

50% 35.000000

75% 46.500000

max 58.000000

Name: Age, dtype: float64

"""

▶ requirements.txt


numpy==2.1.2
pandas==2.2.3
python-dateutil==2.9.0.post0
pytz==2024.2
six==1.16.0
tzdata==2024.2

numpy==2.1.2

pandas==2.2.3

python-dateutil==2.9.0.post0

pytz==2024.2

six==1.16.0

tzdata==2024.2

※ pip install pandas 명령을

[PYTHON/PANDAS] Series 클래스 : max 메소드를 사용해 최대값 구하기

■ Series 클래스의 max 메소드를 사용해 최대값을 구하는 방법을 보여준다. ▶ main.py


import pandas as pd

series = pd.Series([22, 35, 58], name = "Age")

maximumValue = series.max()

print(maximumValue)

"""
58
"""

import pandas as pd

series = pd.Series([22, 35, 58], name = "Age")

maximumValue = series.max()

print(maximumValue)

"""

▶ requirements.txt


numpy==2.1.2
pandas==2.2.3
python-dateutil==2.9.0.post0
pytz==2024.2
six==1.16.0
tzdata==2024.2

numpy==2.1.2

pandas==2.2.3

python-dateutil==2.9.0.post0

pytz==2024.2

six==1.16.0

tzdata==2024.2

※ pip install pandas 명령을 실행했다.'

[PYTHON/PANDAS] Series 클래스 : 생성자에서 리스트와 name 인자를 사용해 Series 객체 만들기

■ Series 클래스의 생성자에서 리스트와 name 인자를 사용해 Series 객체를 만드는 방법을 보여준다. ▶ main.py


import pandas as pd

series = pd.Series([22, 35, 58], name = "Age")

print(series)

"""
0    22
1    35
2    58
Name: Age, dtype: int64
"""

import pandas as pd

series = pd.Series([22, 35, 58], name = "Age")

print(series)

"""

0 22

1 35

2 58

Name: Age, dtype: int64

"""

▶ requirements.txt


numpy==2.1.2
pandas==2.2.3
python-dateutil==2.9.0.post0
pytz==2024.2
six==1.16.0
tzdata==2024.2

numpy==2.1.2

pandas==2.2.3

python-dateutil==2.9.0.post0

pytz==2024.2

six==1.16.0

tzdata==2024.2

※ pip install

[PYTHON/PANDAS] DataFrame 클래스 : 생성자를 사용해 DataFrame 객체 만들기

■ DataFrame 클래스의 생성자를 사용해 DataFrame 객체를 만드는 방법을 보여준다. ▶ main.py


import pandas as pd

dataFrame = pd.DataFrame(
    {
        "Name" : [
            "Braund, Mr. Owen Harris",
            "Allen, Mr. William Henry",
            "Bonnell, Miss. Elizabeth"
        ],
        "Age" : [22, 35, 58],
        "Sex" : ["male", "male", "female"],
    }
)

print(dataFrame)

"""
                       Name  Age     Sex
0   Braund, Mr. Owen Harris   22    male
1  Allen, Mr. William Henry   35    male
2  Bonnell, Miss. Elizabeth   58  female
"""

import pandas as pd

dataFrame = pd.DataFrame(

{

"Name" : [

"Braund, Mr. Owen Harris",

"Allen, Mr. William Henry",

"Bonnell, Miss. Elizabeth"

"Age" : [22, 35, 58],

"Sex" : ["male", "male", "female"],

}

)

print(dataFrame)

"""

Name Age Sex

0 Braund, Mr. Owen Harris 22 male

1 Allen, Mr. William Henry 35 male

2 Bonnell, Miss. Elizabeth 58 female

"""

▶ requirements.txt


numpy==2.1.2
pandas==2.2.3
python-dateutil==2.9.0.post0
pytz==2024.2
six==1.16.0
tzdata==2024.2

numpy==2.1.2

pandas==2.2.3

python-dateutil==2.9.0.post0

pytz==2024.2

six==1.16.0

tzdata==2024.2

※ pip install pandas 명령을 실행했다.

[PYTHON/PANDAS] DataFrame 클래스 : [] 연산자를 사용해 컬럼에 대한 Series 객체 구하기

■ DataFrame 클래스에서 [] 연산자를 사용해 컬럼에 대한 Series 객체를 구하는 방법을 보여준다. ▶ main.py


import pandas as pd
import numpy  as np

np.random.seed(0)

category1List       = ["수익", "비용"]
category2Dictionary = {"수익" : ["영업수익", "영업외수익"], "비용" : ["영업비용", "영업외비용"]}
category3Dictionary = {
    "영업수익"   : ["제품매출", "용역매출"  ],
    "영업외수익" : ["이자수익", "배당금수익"],
    "영업비용"   : ["인건비"  , "재료비"    ],
    "영업외비용" : ["이자비용", "기부금"    ]
}

sourceList = []

def generateAccountCode(대분류, 중분류, 소분류):
    return f"{대분류[:1]}{중분류[:1]}{소분류[:1]}"

for category1 in category1List:
    for category2 in category2Dictionary[category1]:
        for category3 in category3Dictionary[category2]:
            accountCode = generateAccountCode(category1, category2, category3)
            for month in range(1, 13):
                targetAmount = np.random.randint(10000, 100000)
                actualAmount = np.random.randint(8000 , 120000)
                sourceList.append([category1, category2, category3, accountCode, f"2023-{month:02d}", targetAmount, actualAmount])

dataFrame = pd.DataFrame(sourceList, columns = ["대분류", "중분류", "소분류", "계정코드", "해당월", "목표금액", "실적금액"])

for columnName in dataFrame.columns:
    series = dataFrame[columnName]
    print("-" * 100)
    print(columnName)
    for value in series:
        print(value, end = ",", )
    print()

import pandas as pd

import numpy as np

np.random.seed(0)

category1List = ["수익", "비용"]

category2Dictionary = {"수익" : ["영업수익", "영업외수익"], "비용" : ["영업비용", "영업외비용"]}

category3Dictionary = {

"영업수익" : ["제품매출", "용역매출" ],

"영업외수익" : ["이자수익", "배당금수익"],

"영업비용" : ["인건비" , "재료비" ],

"영업외비용" : ["이자비용", "기부금" ]

}

sourceList = []

def generateAccountCode(대분류, 중분류, 소분류):

return f"{대분류[:1]}{중분류[:1]}{소분류[:1]}"

for category1 in category1List:

for category2 in category2Dictionary[category1]:

for category3 in category3Dictionary[category2]:

accountCode = generateAccountCode(category1, category2, category3)

for month in range(1, 13):

targetAmount = np.random.randint(10000, 100000)

actualAmount = np.random.randint(8000 , 120000)

sourceList.append([category1, category2, category3, accountCode, f"2023-{month:02d}", targetAmount, actualAmount])

dataFrame = pd.DataFrame(sourceList, columns = ["대분류", "중분류", "소분류", "계정코드", "해당월", "목표금액", "실적금액"])

for columnName in dataFrame.columns:

series = dataFrame[columnName]

print("-" * 100)

print(columnName)

for value in series:

print(value, end = ",", )

print()

▶ requirements.txt


numpy==2.1.2
pandas==2.2.3
python-dateutil==2.9.0.post0
pytz==2024.2
six==1.16.0
tzdata==2024.2

numpy==2.1.2

pandas==2.2.3

python-dateutil==2.9.0.post0

pytz==2024.2

six==1.16.0

tzdata==2024.2

※ pip install