/************************************* This command file shows how to carry out basic data management tasks using SAS filename: datamgt.sas **************************************/ OPTIONS FORMCHAR="|----|+|---+=|-/\<>*"; options pageno=1; title; data march; infile "marflt.dat"; input flight 1-3 @4 date mmddyy6. @10 time time5. orig $ 15-17 dest $ 18-20 @21 miles comma5. mail 26-29 freight 30-33 boarded 34-36 transfer 37-39 nonrev 40-42 deplane 43-45 capacity 46-48; format date mmddyy10. time time5. miles comma5.; run; /*Make a copy of a dataset*/ data newmarch; set march; run; libname sasdata2 V9 "C:\Users\kwelch\Desktop\sasdata2"; data sasdata2.business2; set sasdata2.business; run; /*Create a subset of data*/ data march15; set march; if date = "15MAR1990"D; run; data london; set march; if dest="LON"; run; data longflt; set march; if miles >=1000; run; /*Wrong way to create new variables in a subset*/ data london_latemarch; set march; if dest="LON" and date >="15MAR1990"D then output; totpassngrs = boarded + transfer + nonrev; pctfull = (totpassngrs/capacity)*100; run; /*Right way to create new variables in a subset*/ data london_latemarch2; set march; totpassngrs = boarded + transfer + nonrev; pctfull = (totpassngrs/capacity)*100; if dest="LON" and date >="15MAR1990"D then output; run; /*Delete cases from a dataset*/ data shortflt; set march; if miles >=1000 then delete; if date=. then delete; run; /*Drop or keep variables*/ data march_passngrs; set march; keep date time orig dest miles boarded transfer nonrev deplane capacity; run; data march_passngrs2; set march; drop mail freight; run; /*Transformations and recodes*/ data march_recode; set march; totpassngrs = boarded + transfer + nonrev; empty_seats = capacity - totpassngrs; totnonpass = mail + freight; pctfull = (totpassngrs/capacity)*100; logpassngrs = log(totpassngrs); totpassngrs2 = sum(boarded,transfer,nonrev); int_pctfull = int(totpassngrs/capacity)*100; rnd_pctfull = round(pctfull,.1); if pctfull = 100 then full_flight = 1; else full_flight = 0; if pctfull = . then full_flight = .; if pctfull not=. then do; if pctfull < 25 then full_cat = 1; if pctfull >=25 and pctfull <50 then full_cat=2; if pctfull >=50 and pctfull <75 then full_cat=3; if pctfull >=75 then full_cat=4; end; if dest = "CPH" or dest="FRA" or dest = "LON" or dest = "PAR" or dest = "YYZ" then USA = 0; if dest in("DFW", "LAX", "ORD", "WAS") then USA = 1; run; title "Check New Variables"; proc means data=march_recode; run; proc freq data=march_recode; tables full_flight full_cat dest USA; run; /*Sort Cases*/ proc sort data=march_recode; by USA; run; title "Descriptive Statistics by US vs Non-US Destinations"; proc means data=march_recode; by USA; run; /*Sort cases by more than one variable*/ proc sort data=march_recode; by date USA; run; title "Descriptive Statistics by Date and Destination"; proc means data=march_recode; by date USA; run; /*Using the tagsort option*/ proc sort data=march_recode tagsort; by date dest; run; /*Create a new sorted dataset*/ proc sort data=march_recode out=sortdat; by totpassngrs; run; /*Get rid of duplicates*/ proc sort data=march_recode out=sortdat2 nodupkey; by date dest; run; proc sort data=march_recode out=sortdat3 noduprec; by date dest; run; /*Select cases for analysis*/ title "Flights to Los Angeles"; proc print data=march_recode; where dest = "LAX"; var flight dest totpassngrs; run; title "Missing Destination"; proc print data=march_recode; where dest = " "; var flight dest totpassngrs; run; title "Flights Less than 30 Percent Full"; proc print data=march_recode; where pctfull < 30; var dest date pctfull; run; title "Flights Less than 30 Percent Full"; proc print data=march_recode; where pctfull not=. and pctfull < 30; var dest date pctfull; run; title "Flights Between 25 and 35 Percent Full"; proc print data=march_recode; where pctfull between 25 and 35; run; title "Cases Where Number of Passengers is Missing"; proc print data=march_recode; where totpassngrs = .; var flight dest totpassngrs; run; title "Flights less than 60 percent full to London"; proc print data=march_recode; where (pctfull < 60) and (dest="LON") ; var flight dest totpassngrs capacity pctfull; run; title "Flights on March 7th, 1990"; proc print data=march_recode; where date = "07MAR90"D; run; title "Flights March 7th to March 9th , 1990"; proc print data=march_recode; where date between "07MAR90"D and "14MAR90"D; run; title "Cases with Missing Date"; proc print data=march_recode; where date = .; var flight dest date; run;