שימו לב: על מנת להריץ את התאים ב-Live Code, יש לייבא תחילה את ספרית pandas ע”י הרצת השורת הראשונה בתא למטה.
בנוסף, נגביל את מספר השורות והעמודות שתופענה בהדפסת הטבלאות ע”י שורות הקוד השניה והשלישית:
import pandas as pd
pd.options.display.max_rows=5
pd.options.display.max_columns=5
import utils.load_auxilary_files # This will load the files used for this notebook
['orders_2_2024.csv', 'erosion.png', 'infile1.txt', 'countries-of-the-world.csv', 'monday.txt', 'StudentsGrades.csv', 'notebook_resources.zip', 'erosion_2.png', '__MACOSX', 'dog_noised.png', 'foods.txt', 'witcher_2.csv', 'dialation.png', 'evil_morty_change.png', 'ship.png', 'products2.csv', 'ex1.csv', 'evil_morty_change_3.png', 'evil_morty_1.png', 'baby.png', 'orders_3_2024.csv', 'koala.png', 'woman_noised.png', 'infile2.txt', 'orders_4_2024.csv', 'dog.png', 'witcher_1.csv', 'evil_morty_segmentation.png', 'out.txt', 'sunday.txt', 'ex2.csv', 'evil_morty_change_noised.png', 'dialation_2.png']
שאלות ממבחני עבר#
E.
def read_in_range(start_month,start_year,end_month,end_year):
cur_month=start_month
cur_year=start_year
dfs=[]
while cur_year < end_year or (cur_year == end_year and cur_month <= end_month):
dfs.append(pd.read_csv("files/orders_"+str(cur_month)+"_"+str(cur_year)+".csv")) # The "files/" prefix in the path is due to technical reasons, and should be ignored.
cur_month+=1
if cur_month>12:
cur_year+=1
cur_month=1
return pd.concat(dfs, ignore_index=True)
Read products and order tables
df_p=pd.read_csv("files/products2.csv") # The "files/" prefix in the path is due to technical reasons, and should be ignored.
df_o=read_in_range(2,2024,4,2024)
F.
display(df_o)
df_o.fillna(df_o.median(),inplace=True)
display(df_o)
| Espresso | Americano | Sandwich | Salad | |
|---|---|---|---|---|
| 0 | 0 | 1.0 | 0 | 1.0 |
| 1 | 1 | 1.0 | 0 | NaN |
| ... | ... | ... | ... | ... |
| 7 | 1 | 5.0 | 0 | 0.0 |
| 8 | 2 | 0.0 | 5 | 0.0 |
9 rows × 4 columns
| Espresso | Americano | Sandwich | Salad | |
|---|---|---|---|---|
| 0 | 0 | 1.0 | 0 | 1.0 |
| 1 | 1 | 1.0 | 0 | 0.5 |
| ... | ... | ... | ... | ... |
| 7 | 1 | 5.0 | 0 | 0.0 |
| 8 | 2 | 0.0 | 5 | 0.0 |
9 rows × 4 columns
G.
df_p.index=df_p['Name'] # First line is missing
df_join=pd.concat((df_p,df_o.T), axis=1)
df_join
| Name | Price | ... | 7 | 8 | |
|---|---|---|---|---|---|
| Sandwich | Sandwich | 30 | ... | 0.0 | 5.0 |
| Salad | Salad | 40 | ... | 0.0 | 0.0 |
| Espresso | Espresso | 8 | ... | 1.0 | 2.0 |
| Americano | Americano | 11 | ... | 5.0 | 0.0 |
4 rows × 13 columns
H.
profit=df_join.iloc[:,4:].sum(axis=1)*( df_join['Price']- df_join['Cost']) # "axis of sum should be 1"
profit
Sandwich 120.0
Salad 110.0
Espresso 42.0
Americano 75.0
dtype: float64
I.
df_p['Gain']=df_o.sum()*(df_p['Price']-df_p['Cost'])
df_p.groupby('Vendor')['Gain'].sum().idxmin()
'LIV'