/********************************************** THIS COMMAND FILE SHOWS HOW TO COMBINE SAS DATASETS, BY BOTH APPENDING, AND BY MERGING. FILENAME: COMBINE.SAS ***********************************************/ title; options nodate; OPTIONS FORMCHAR="|----|+|---+=|-/\<>*"; /*CREATE BOYS AND GIRLS DATA SETS*/ data boys; input name $ sex $ age height teacher $; cards; Tom M 12 62 Smith Bob M 13 57 Green Joe M 11 59 Green Harry M 12 53 Green William M 13 60 Smith John M 11 57 Smith Richard M 11 55 Green ; title "Boys Dataset"; proc contents data=boys varnum; run; data girls; length name $ 10; input name $ age sex $ teacher $; cards; Sharice 13 F Smith Mary 12 F Smith Ellen 11 F Green Carol 11 F Green Chris 13 F Smith Claire 12 F Green Raye 13 F Smith Wilhelmina 12 F Green ; title "Girls Data"; proc contents data=girls varnum; run; proc print data=girls; run; /*APPEND DATA SETS USING THE SET STATEMENT*/ data allkids; length name $ 10; set boys girls; run; title "printout of allkids dataset"; title2 "with boys first in the data set"; proc print data = allkids; run; proc contents data=allkids varnum; run; /*SWITCH THE ORDER OF THE TWO DATA SETS*/ data allkids2; set girls boys; run; title "printout of allkids2 dataset"; title2 "with girls first in the data set"; proc print data = allkids2; run; **********************************************************; *Merging datasets that have different variables; **********************************************************; data exam; input id examdate mmddyy10. sex age height weight sbp dbp; format examdate mmddyy10.; cards; 1 10/18/2000 1 25 72 156 128 89 2 05/29/2000 1 33 68 168 145 96 3 02/21/2000 1 47 65 182 152 98 4 06/17/2000 1 29 69 190 139 91 5 01/11/2000 2 37 62 129 145 93 6 08/15/2000 2 42 64 156 133 94 ; data lab; input id hgb; cards; 1 13.2 4 12.1 3 14.5 6 12.8 12 13.0 ; proc sort data=exam; by id; run; proc sort data=lab; by id; run; data exam_lab; merge exam lab; by id; run; title "Printout of Exam_lab Data Set"; proc print; run; /*INCLUDE ONLY CASES THAT ARE IN BOTH DATA SETS*/ data exam_lab2; merge exam(in=a) lab(in=b); by id; if a and b; run; title "Exam_lab2 Data Set Includes Only Those"; title2 "In Both Data Sets"; proc print data=exam_lab2; run; /*INCLUDE CASES THAT ARE IN EXAM, WHETHER THEY ARE IN LAB OR NOT*/ data exam_lab3; merge exam(in=a) lab(in=b); by id; if a; run; title "Exam_lab3 Data Set Includes Those"; title2 "In Exam Data, Regardless of Lab Data"; proc print data=exam_lab3; run; /*INCLUDE CASES FROM LAB, WHETHER THEY ARE IN EXAM OR NOT*/ data exam_lab4; merge exam(in=a) lab(in=b); by id; if b; run; title "Exam_lab4 Data Set Includes Those"; title2 "In Lab Data, Regardless of Exam Data"; proc print data=exam_lab4; run; **********************************************************; *MERGING DATA SETS THAT HAVE THE SAME VARIABLE NAMES **********************************************************; data oldsal; input name $ idnum sex $ age salary jobcat year; cards; Roger 518 M 45 7677 2 1989 Martha 321 F 28 5000 1 1989 Zeke 444 M 33 6075 1 1989 Barb 1728 F 40 9023 2 1989 Bill 993 M 36 7739 3 1989 Sandy 1002 F 29 6161 3 1989 ; data newsal; input name $ idnum salary jobcat year; cards; Hank 108 11138 1 1995 Fred 519 10035 2 1995 Zeke 444 9697 1 1995 Martha 321 7987 2 1995 Sandy 1002 6995 2 1995 Bill 993 12400 3 1995 Roxy 773 10119 2 1995 ; /*merging by idnum*/ proc sort data=oldsal; by idnum; run; proc sort data=newsal; by idnum; run; data combine1; merge oldsal(rename=(salary=salary89 jobcat=jobcat89)) newsal(rename=(salary=salary95 jobcat=jobcat95)); by idnum; drop year; run; title "printout of combine1 data set"; title2 "matching by id number"; title3 "all cases that were in either data set are included"; proc print data=combine1; run; /*merging by idnum, but keeping only cases that are in both datasets*/ data combine2; merge oldsal(in=a rename=(salary=salary89 jobcat=jobcat89)) newsal(in=b rename=(salary=salary95 jobcat=jobcat95)); by idnum; if a and b; totsal = sum (salary89,salary95); format salary89 salary95 totsal dollar12.; drop year; run; title "printout of combine2 data set"; title2 "matching by id number"; title3 "and only including cases that are in both data sets"; proc print data=combine2; run; /*Merge data from a Table with data having multiple obs per group*/ PROC IMPORT OUT= WORK.AgeStudy DATAFILE= "AgeStudy.xls" DBMS=EXCEL REPLACE; RANGE="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES; RUN; title "Study Data"; proc contents data=agestudy; run; proc print data=agestudy; run; /*Import the Census Data*/ PROC IMPORT OUT= WORK.Census DATAFILE= "MI_Census_2000.xls" DBMS=EXCEL REPLACE; RANGE="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES; RUN; title "Census Data"; proc contents data=census; run; proc print data=census(obs=10); run; /*Now, merge the two datasets, but keep only cases that occur in the Age Study*/ proc sort data=census; by county; run; proc sort data=agestudy; by county; run; data study_census; length county $ 16; merge agestudy(in=instudy) census(in=incensus); by county; if instudy; run; title "Printout of Merged Data"; proc print data=study_census; format Census_2000 comma12.; run;