From 6c921d998317cd347436e0afc37b1654401ffb27 Mon Sep 17 00:00:00 2001 From: Dominic Ricottone Date: Wed, 29 Mar 2023 23:03:58 -0500 Subject: [PATCH] Initial commit A more-or-less complete replication of the Census Report's Chicago profile. (Issues are noted in the top-level README.) --- README.md | 44 ++ data/.gitignore | 4 + data/README.md | 115 ++++ src/01_Read.sas | 1443 +++++++++++++++++++++++++++++++++++++++ src/02_Prepare.sas | 842 +++++++++++++++++++++++ src/03_Demographics.sas | 42 ++ src/04_Economics.sas | 73 ++ src/05_Families.sas | 57 ++ src/06_Housing.sas | 56 ++ src/07_Social.sas | 81 +++ 10 files changed, 2757 insertions(+) create mode 100644 README.md create mode 100644 data/.gitignore create mode 100644 data/README.md create mode 100644 src/01_Read.sas create mode 100644 src/02_Prepare.sas create mode 100644 src/03_Demographics.sas create mode 100644 src/04_Economics.sas create mode 100644 src/05_Families.sas create mode 100644 src/06_Housing.sas create mode 100644 src/07_Social.sas diff --git a/README.md b/README.md new file mode 100644 index 0000000..ed086aa --- /dev/null +++ b/README.md @@ -0,0 +1,44 @@ +# Chicago + +A demographic and socioeconomic profile of Chicago, +replicating the one displayed on +[Census Reporter](https://censusreporter.org/profiles/16000US1714000-chicago-il/). +Notably that portal uses ACS 1 year estimates, while 5 year estimates for +2021 are available and generally considered more reliable. + + +## Data + +See the `data` directory for more information on obtaining data for this +project. I will not distribute data with this repository. + + +## Analysis + +See the `src` directory for the source code used here. + + +## Issues and Notes + +The continuous data for when a respondent moved into their current housing unit +is not publicly available, so I cannot recreate that table/chart. + +I do not presently understand how to replicate *Number of housing units*, +or perhaps more accurately I don't understand how this quantity differs from +*Number of households* in any meaningful, Census-related way. + +Something is wrong with the official profile's tabulation of house values. +(See *Value of owner-occupied housing units*). +The easiest way to demonstrate this is to look at the top bucket. +The *Over $1M* item's total is 14,010. +Following the link to Table B25075, however, reveals that 14,010 is the total +for just *$1,000,000 to $1,499,999*. +Potentially this is a matter of topcoding policy differing between the two +backends? + +The pandemic caused a few rapid changes to demography. +It took me a while to convince myself that my recreation was not in error. +The most notable table is *Means of transportation*, especially as seen in the +*Worked at home* item. +But other Census Reporter tables corroborate my numbers. + diff --git a/data/.gitignore b/data/.gitignore new file mode 100644 index 0000000..c2d1fec --- /dev/null +++ b/data/.gitignore @@ -0,0 +1,4 @@ +*.dat +*.cbk +*.sas + diff --git a/data/README.md b/data/README.md new file mode 100644 index 0000000..8ef2189 --- /dev/null +++ b/data/README.md @@ -0,0 +1,115 @@ +# ACS 2021 PUMS + + +## IPUMS + +The ACS databases used here were created and maintained by IPUMS USA, +University of Minnesota. Credit for the data belongs with: + + Steven Ruggles, Sarah Flood, Matthew Sobek, Danika Brockman, Grace Cooper, Stephanie Richards, and Megan Schouweiler. IPUMS USA: Version 13.0 [dataset]. Minneapolis, MN: IPUMS, 2023. +https://doi.org/10.18128/D010.V13.0 + + Steven Ruggles, Catherine A. Fitch, Ronald Goeken, J. David Hacker, Matt A. Nelson, Evan Roberts, Megan Schouweiler and Matthew Sobek. IPUMS Ancestry Full Count Data: Version 3.0 [dataset]. Minneapolis, MN: IPUMS, 2021. +https://doi.org/10.18128/D014.V3.0 + + +## Household Records + +Download a hierachical dataset from IPUMS containing these variables: + + STATEFIP + + PUMA + + REPWT + + ADJHSG + + NP + + TEN + + VACS + + VALP + + SVAL + + BLD + +It should use the ACS 2021 5-year sample. + +Optionally, subset the extraction to just Illinois cases (based on `STATEFIP`). +I subset later the cases anyway. + +Also collect the SAS read instructions and the basic codebook. + +```bash +$ gunzip usa_00006.dat.gz +$ wc -l usa_00006.dat +293143 usa_00006.dat +``` + +Now I manipulate the codebook into something useful for `awk(1)`. + +```bash +$ sed -e '11,110!d' usa_00006.cbk | awk '{ print $4 }' | xargs echo +1 4 4 6 8 13 10 1 13 2 5 12 1 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 2 2 1 1 7 1 +``` + +Examine the `STATEFIP` field. If I hadn't subset cases in the IPUMS portal, +I would do that now. + +```bash +$ awk 'BEGIN {FIELDWIDTHS="1 4 4 6 8 13 10 1 13 2 5 12 1 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 2 2 1 1 7 1"} {if ($1=="H"){print $10}}' usa_00006.dat | sort -n | uniq -c + 293143 17 +``` + +Now subset cases based on the `PUMA` field. + +```bash +$ awk 'BEGIN {FIELDWIDTHS="1 4 4 6 8 13 10 1 13 2 5 12 1 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 2 2 1 1 7 1"} $11~/035(0[1234]|2[0123456789]|3[012])/' usa_00006.dat >usa_00006_chi.dat +$ wc -l usa_00006_chi.dat +52097 usa_00006_chi.dat +``` + +With this, I have a household data file that is restricted to just Chicago. + + +## Person Records + +Download a rectangular dataset from IPUMS containing these variables: + + STATEIP + + PUMA + + REPWTP + + HHT + + ADJINC + + AGEP + + FER + + JWMNP + + JWRIP + + JWTRNS + + LANX + + MAR + + MIG + + MLPA + + MLPB + + MLPCD + + MLPE + + MLPFG + + MLPH + + MLPJ + + SCHL + + SEX + + HISP + + LANP + + MIGPUMA + + MIGSP + + NATIVITY + + PINCP + + POBP + + POVPIP + + RAC1P + +Follow all the same preparation steps as with the household extraction. + +```bash +$ wc -l usa_00007.dat +621164 usa_00007.dat +$ sed -e '11,132!d' usa_00007.cbk | awk '{ print $4 }' | xargs echo +4 4 6 8 13 10 13 2 5 12 1 1 4 10 1 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 2 1 3 2 2 1 1 1 1 1 1 1 1 1 1 2 1 2 4 5 3 1 7 3 3 1 +$ awk 'BEGIN {FIELDWIDTHS="4 4 6 8 13 10 13 2 5 12 1 1 4 10 1 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 2 1 3 2 2 1 1 1 1 1 1 1 1 1 1 2 1 2 4 5 3 1 7 3 3 1"} {print $8}' usa_00007.dat | sort -n | uniq -c + 621164 17 +$ awk 'BEGIN {FIELDWIDTHS="4 4 6 8 13 10 13 2 5 12 1 1 4 10 1 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 2 1 3 2 2 1 1 1 1 1 1 1 1 1 1 2 1 2 4 5 3 1 7 3 3 1"} $9~/035(0[1234]|2[0123456789]|3[012])/' usa_00007.dat >usa_00007_chi.dat +$ wc -l usa_00007_chi.dat +101501 usa_00007_chi.dat +``` + diff --git a/src/01_Read.sas b/src/01_Read.sas new file mode 100644 index 0000000..631be0a --- /dev/null +++ b/src/01_Read.sas @@ -0,0 +1,1443 @@ +/* + This is a vendored copy of two import syntax files generated by IPUMS. + - librefs and filenames are updated + - duplicative formats are removed + - scalar formats are removed + - added some indentation +*/ + +/* NOTE: updated references */ +libname IPUMS "~"; +filename dat_h "~/usa_00006_chi.dat"; +filename dat_p "~/usa_00007_chi.dat"; + +proc format cntlout = IPUMS.raw_h_f; + + /* NOTE: killed YEAR_f format */ + /* NOTE: killed SAMPLE_f format */ + + value REPWT_f + 0 = "Repwt not available" + 1 = "Repwt available" + ; + + value STATEFIP_f + 01 = "Alabama" + 02 = "Alaska" + 04 = "Arizona" + 05 = "Arkansas" + 06 = "California" + 08 = "Colorado" + 09 = "Connecticut" + 10 = "Delaware" + 11 = "District of Columbia" + 12 = "Florida" + 13 = "Georgia" + 15 = "Hawaii" + 16 = "Idaho" + 17 = "Illinois" + 18 = "Indiana" + 19 = "Iowa" + 20 = "Kansas" + 21 = "Kentucky" + 22 = "Louisiana" + 23 = "Maine" + 24 = "Maryland" + 25 = "Massachusetts" + 26 = "Michigan" + 27 = "Minnesota" + 28 = "Mississippi" + 29 = "Missouri" + 30 = "Montana" + 31 = "Nebraska" + 32 = "Nevada" + 33 = "New Hampshire" + 34 = "New Jersey" + 35 = "New Mexico" + 36 = "New York" + 37 = "North Carolina" + 38 = "North Dakota" + 39 = "Ohio" + 40 = "Oklahoma" + 41 = "Oregon" + 42 = "Pennsylvania" + 44 = "Rhode Island" + 45 = "South Carolina" + 46 = "South Dakota" + 47 = "Tennessee" + 48 = "Texas" + 49 = "Utah" + 50 = "Vermont" + 51 = "Virginia" + 53 = "Washington" + 54 = "West Virginia" + 55 = "Wisconsin" + 56 = "Wyoming" + 61 = "Maine-New Hampshire-Vermont" + 62 = "Massachusetts-Rhode Island" + 63 = "Minnesota-Iowa-Missouri-Kansas-Nebraska-S.Dakota-N.Dakota" + 64 = "Maryland-Delaware" + 65 = "Montana-Idaho-Wyoming" + 66 = "Utah-Nevada" + 67 = "Arizona-New Mexico" + 68 = "Alaska-Hawaii" + 72 = "Puerto Rico" + 97 = "Military/Mil. Reservation" + 99 = "State not identified" + ; + + value GQ_f + 0 = "Vacant unit" + 1 = "Households under 1970 definition" + 2 = "Additional households under 1990 definition" + 3 = "Group quarters--Institutions" + 4 = "Other group quarters" + 5 = "Additional households under 2000 definition" + 6 = "Fragment" + ; + + /* NOTE: killed US2021C_ADJHSG_f format */ + + value $ US2021C_NP_f + "00" = "Vacant unit" + "01" = "One person in household or any person in group quarters" + "10" = "[no label]" + "11" = "[no label]" + "12" = "[no label]" + "13" = "[no label]" + "14" = "[no label]" + "15" = "[no label]" + "16" = "[no label]" + "17" = "[no label]" + "18" = "[no label]" + "19" = "[no label]" + "20" = "[no label]" + "B0" = "[no label]" + "B1" = "[no label]" + "B2" = "[no label]" + "B3" = "[no label]" + "B4" = "[no label]" + "B5" = "[no label]" + "B6" = "[no label]" + "B7" = "[no label]" + "B8" = "[no label]" + "B9" = "[no label]" + "02" = "[no label]" + "03" = "[no label]" + "04" = "[no label]" + "05" = "[no label]" + "06" = "[no label]" + "07" = "[no label]" + "08" = "[no label]" + "09" = "[no label]" + ; + + value $ US2021C_BLD_f + "BB" = "N/A (GQ)" + "01" = "Mobile home or trailer" + "02" = "One-family house detached" + "03" = "One-family house attached" + "04" = "2 Apartments" + "05" = "3-4 Apartments" + "06" = "5-9 Apartments" + "07" = "10-19 Apartments" + "08" = "20-49 Apartments" + "09" = "50 or more apartments" + "10" = "Boat, RV, van, etc." + ; + + value $ US2021C_TEN_f + "B" = "N/A (GQ/vacant)" + "1" = "Owned with mortgage or loan (include home equity loans)" + "2" = "Owned free and clear" + "3" = "Rented" + "4" = "Occupied without payment of rent" + ; + + value $ US2021C_VACS_f + "B" = "N/A (GQ/occupied)" + "1" = "For rent" + "2" = "Rented, not occupied" + "3" = "For sale only" + "4" = "Sold, not occupied" + "5" = "For seasonal/recreational/occasional use" + "6" = "For migrant workers" + "7" = "Other vacant" + ; + + value $ US2021C_SVAL_f + "B" = "N/A (GQ/ vacant units, except 'for-sale-only' and 'sold, not occupied'/not owned or being bought)" + "0" = "A single family home on 10 or more acres or any other type of building, including mobile homes, with" + " no regard to acreage." + "1" = "A single family home on less than 10 acres." + ; + +run; + +data IPUMS.raw_h; + infile dat_h pad missover lrecl=581; + + input + RECTYPE $ 1-1 @ + ; + + if RECTYPE = "H" then do; + input + RECTYPE $ 1-1 + YEAR 2-5 + MULTYEAR 6-9 + SAMPLE 10-15 + SERIAL 16-23 + CBSERIAL 24-36 + HHWT 37-46 .2 + REPWT 47-47 + CLUSTER 48-60 + STATEFIP 61-62 + PUMA 63-67 + STRATA 68-79 + GQ 80-80 + REPWT1 81-86 + REPWT2 87-92 + REPWT3 93-98 + REPWT4 99-104 + REPWT5 105-110 + REPWT6 111-116 + REPWT7 117-122 + REPWT8 123-128 + REPWT9 129-134 + REPWT10 135-140 + REPWT11 141-146 + REPWT12 147-152 + REPWT13 153-158 + REPWT14 159-164 + REPWT15 165-170 + REPWT16 171-176 + REPWT17 177-182 + REPWT18 183-188 + REPWT19 189-194 + REPWT20 195-200 + REPWT21 201-206 + REPWT22 207-212 + REPWT23 213-218 + REPWT24 219-224 + REPWT25 225-230 + REPWT26 231-236 + REPWT27 237-242 + REPWT28 243-248 + REPWT29 249-254 + REPWT30 255-260 + REPWT31 261-266 + REPWT32 267-272 + REPWT33 273-278 + REPWT34 279-284 + REPWT35 285-290 + REPWT36 291-296 + REPWT37 297-302 + REPWT38 303-308 + REPWT39 309-314 + REPWT40 315-320 + REPWT41 321-326 + REPWT42 327-332 + REPWT43 333-338 + REPWT44 339-344 + REPWT45 345-350 + REPWT46 351-356 + REPWT47 357-362 + REPWT48 363-368 + REPWT49 369-374 + REPWT50 375-380 + REPWT51 381-386 + REPWT52 387-392 + REPWT53 393-398 + REPWT54 399-404 + REPWT55 405-410 + REPWT56 411-416 + REPWT57 417-422 + REPWT58 423-428 + REPWT59 429-434 + REPWT60 435-440 + REPWT61 441-446 + REPWT62 447-452 + REPWT63 453-458 + REPWT64 459-464 + REPWT65 465-470 + REPWT66 471-476 + REPWT67 477-482 + REPWT68 483-488 + REPWT69 489-494 + REPWT70 495-500 + REPWT71 501-506 + REPWT72 507-512 + REPWT73 513-518 + REPWT74 519-524 + REPWT75 525-530 + REPWT76 531-536 + REPWT77 537-542 + REPWT78 543-548 + REPWT79 549-554 + REPWT80 555-560 + US2021C_ADJHSG 561-567 .6 /* NOTE: changed informat from string to 8.6 numeric */ + US2021C_NP $ 568-569 + US2021C_BLD $ 570-571 + US2021C_TEN $ 572-572 + US2021C_VACS $ 573-573 + US2021C_VALP $ 574-580 + US2021C_SVAL $ 581-581 + ; + output; + end; + + label + RECTYPE = "Record type" + YEAR = "Census year" + MULTYEAR = "Actual year of survey, multi-year ACS/PRCS" + SAMPLE = "IPUMS sample identifier" + SERIAL = "Household serial number" + CBSERIAL = "Original Census Bureau household serial number" + HHWT = "Household weight" + REPWT = "Household replicate weights [80 variables]" + CLUSTER = "Household cluster for variance estimation" + STATEFIP = "State (FIPS code)" + PUMA = "Public Use Microdata Area" + STRATA = "Household strata for variance estimation" + GQ = "Group quarters status" + REPWT1 = "Household replicate weight 1" + REPWT2 = "Household replicate weight 2" + REPWT3 = "Household replicate weight 3" + REPWT4 = "Household replicate weight 4" + REPWT5 = "Household replicate weight 5" + REPWT6 = "Household replicate weight 6" + REPWT7 = "Household replicate weight 7" + REPWT8 = "Household replicate weight 8" + REPWT9 = "Household replicate weight 9" + REPWT10 = "Household replicate weight 10" + REPWT11 = "Household replicate weight 11" + REPWT12 = "Household replicate weight 12" + REPWT13 = "Household replicate weight 13" + REPWT14 = "Household replicate weight 14" + REPWT15 = "Household replicate weight 15" + REPWT16 = "Household replicate weight 16" + REPWT17 = "Household replicate weight 17" + REPWT18 = "Household replicate weight 18" + REPWT19 = "Household replicate weight 19" + REPWT20 = "Household replicate weight 20" + REPWT21 = "Household replicate weight 21" + REPWT22 = "Household replicate weight 22" + REPWT23 = "Household replicate weight 23" + REPWT24 = "Household replicate weight 24" + REPWT25 = "Household replicate weight 25" + REPWT26 = "Household replicate weight 26" + REPWT27 = "Household replicate weight 27" + REPWT28 = "Household replicate weight 28" + REPWT29 = "Household replicate weight 29" + REPWT30 = "Household replicate weight 30" + REPWT31 = "Household replicate weight 31" + REPWT32 = "Household replicate weight 32" + REPWT33 = "Household replicate weight 33" + REPWT34 = "Household replicate weight 34" + REPWT35 = "Household replicate weight 35" + REPWT36 = "Household replicate weight 36" + REPWT37 = "Household replicate weight 37" + REPWT38 = "Household replicate weight 38" + REPWT39 = "Household replicate weight 39" + REPWT40 = "Household replicate weight 40" + REPWT41 = "Household replicate weight 41" + REPWT42 = "Household replicate weight 42" + REPWT43 = "Household replicate weight 43" + REPWT44 = "Household replicate weight 44" + REPWT45 = "Household replicate weight 45" + REPWT46 = "Household replicate weight 46" + REPWT47 = "Household replicate weight 47" + REPWT48 = "Household replicate weight 48" + REPWT49 = "Household replicate weight 49" + REPWT50 = "Household replicate weight 50" + REPWT51 = "Household replicate weight 51" + REPWT52 = "Household replicate weight 52" + REPWT53 = "Household replicate weight 53" + REPWT54 = "Household replicate weight 54" + REPWT55 = "Household replicate weight 55" + REPWT56 = "Household replicate weight 56" + REPWT57 = "Household replicate weight 57" + REPWT58 = "Household replicate weight 58" + REPWT59 = "Household replicate weight 59" + REPWT60 = "Household replicate weight 60" + REPWT61 = "Household replicate weight 61" + REPWT62 = "Household replicate weight 62" + REPWT63 = "Household replicate weight 63" + REPWT64 = "Household replicate weight 64" + REPWT65 = "Household replicate weight 65" + REPWT66 = "Household replicate weight 66" + REPWT67 = "Household replicate weight 67" + REPWT68 = "Household replicate weight 68" + REPWT69 = "Household replicate weight 69" + REPWT70 = "Household replicate weight 70" + REPWT71 = "Household replicate weight 71" + REPWT72 = "Household replicate weight 72" + REPWT73 = "Household replicate weight 73" + REPWT74 = "Household replicate weight 74" + REPWT75 = "Household replicate weight 75" + REPWT76 = "Household replicate weight 76" + REPWT77 = "Household replicate weight 77" + REPWT78 = "Household replicate weight 78" + REPWT79 = "Household replicate weight 79" + REPWT80 = "Household replicate weight 80" + US2021C_ADJHSG = "Adjustment factor for housing dollar amounts (6 implied decimal places)" + US2021C_NP = "Number of persons in this household" + US2021C_BLD = "Units in structure" + US2021C_TEN = "Tenure" + US2021C_VACS = "Vacancy status" + US2021C_VALP = "Property value" + US2021C_SVAL = "Specified owner unit" + ; + + format + REPWT REPWT_f. + STATEFIP STATEFIP_f. + GQ GQ_f. + US2021C_NP US2021C_NP_f. + US2021C_BLD US2021C_BLD_f. + US2021C_TEN US2021C_TEN_f. + US2021C_VACS US2021C_VACS_f. + US2021C_SVAL US2021C_SVAL_f. + ; + + format + CBSERIAL 13. + HHWT 11.2 + CLUSTER 13. + STRATA 12. + ; + +run; + +proc format cntlout = IPUMS.raw_p_f; + + /* NOTE: killed YEAR_f format */ + /* NOTE: killed SAMPLE_f format */ + + value STATEFIP_f + 01 = "Alabama" + 02 = "Alaska" + 04 = "Arizona" + 05 = "Arkansas" + 06 = "California" + 08 = "Colorado" + 09 = "Connecticut" + 10 = "Delaware" + 11 = "District of Columbia" + 12 = "Florida" + 13 = "Georgia" + 15 = "Hawaii" + 16 = "Idaho" + 17 = "Illinois" + 18 = "Indiana" + 19 = "Iowa" + 20 = "Kansas" + 21 = "Kentucky" + 22 = "Louisiana" + 23 = "Maine" + 24 = "Maryland" + 25 = "Massachusetts" + 26 = "Michigan" + 27 = "Minnesota" + 28 = "Mississippi" + 29 = "Missouri" + 30 = "Montana" + 31 = "Nebraska" + 32 = "Nevada" + 33 = "New Hampshire" + 34 = "New Jersey" + 35 = "New Mexico" + 36 = "New York" + 37 = "North Carolina" + 38 = "North Dakota" + 39 = "Ohio" + 40 = "Oklahoma" + 41 = "Oregon" + 42 = "Pennsylvania" + 44 = "Rhode Island" + 45 = "South Carolina" + 46 = "South Dakota" + 47 = "Tennessee" + 48 = "Texas" + 49 = "Utah" + 50 = "Vermont" + 51 = "Virginia" + 53 = "Washington" + 54 = "West Virginia" + 55 = "Wisconsin" + 56 = "Wyoming" + 61 = "Maine-New Hampshire-Vermont" + 62 = "Massachusetts-Rhode Island" + 63 = "Minnesota-Iowa-Missouri-Kansas-Nebraska-S.Dakota-N.Dakota" + 64 = "Maryland-Delaware" + 65 = "Montana-Idaho-Wyoming" + 66 = "Utah-Nevada" + 67 = "Arizona-New Mexico" + 68 = "Alaska-Hawaii" + 72 = "Puerto Rico" + 97 = "Military/Mil. Reservation" + 99 = "State not identified" + ; + + value GQ_f + 0 = "Vacant unit" + 1 = "Households under 1970 definition" + 2 = "Additional households under 1990 definition" + 3 = "Group quarters--Institutions" + 4 = "Other group quarters" + 5 = "Additional households under 2000 definition" + 6 = "Fragment" + ; + + value $ US2021C_HHT_f + "B" = "N/A (GQ/vacant)" + "1" = "Married couple household" + "2" = "Other family household: Male householder, no spouse present" + "3" = "Other family household: Female householder, no spouse present" + "4" = "Nonfamily household: Male householder: Living alone" + "5" = "Nonfamily household: Male householder: Not living alone" + "6" = "Nonfamily household: Female householder: Living alone" + "7" = "Nonfamily household: Female householder: Not living alone" + ; + + value REPWTP_f + 0 = "Repwtp not available" + 1 = "Repwtp available" + ; + + /* NOTE: killed US2021C_ADJINCP_f format */ + + /* NOTE: killed US2021C_AGEP_f format */ + + value $ US2021C_FER_f + "B" = "N/A (less than 15 years/greater than 50 years/ male)" + "1" = "Yes" + "2" = "No" + ; + + /* NOTE: killed US2021C_JWMNP_f format */ + + /* NOTE: killed US2021C_JWRIP_f format */ + + value $ US2021C_JWTRNS_f + "BB" = "N/A (not a worker-not in the labor force, including persons under 16 years; unemployed; employed, wi" + "th a job but not at work; Armed Forces, with a job but not at work)" + "01" = "Car, truck, or van" + "02" = "Bus" + "03" = "Subway or elevated rail" + "04" = "Long-distance train or commuter rail" + "05" = "Light rail, streetcar, or trolley" + "06" = "Ferryboat" + "07" = "Taxicab" + "08" = "Motorcycle" + "09" = "Bicycle" + "10" = "Walked" + "11" = "Worked from home" + "12" = "Other method" + ; + + value $ US2021C_LANX_f + "B" = "N/A (less than 5 years old)" + "1" = "Yes, speaks another language" + "2" = "No, speaks only English" + ; + + value $ US2021C_MAR_f + "1" = "Married" + "2" = "Widowed" + "3" = "Divorced" + "4" = "Separated" + "5" = "Never married or under 15 years old" + ; + + value $ US2021C_MIG_f + "B" = "N/A (less than 1 year old)" + "1" = "Yes, same house (nonmovers)" + "2" = "No, outside US and Puerto Rico" + "3" = "No, different house in US or Puerto Rico" + ; + + value $ US2021C_MLPA_f + "B" = "N/A (less than 17 years old/no active duty)" + "0" = "Did not serve this period" + "1" = "Served this period" + ; + + value $ US2021C_MLPB_f + "B" = "N/A (less than 17 years old/no active duty)" + "0" = "Did not serve this period" + "1" = "Served this period" + ; + + value $ US2021C_MLPCD_f + "B" = "N/A (less than 17 years old/no active duty)" + "0" = "Did not serve this period" + "1" = "Served this period" + ; + + value $ US2021C_MLPE_f + "B" = "N/A (less than 17 years old/no active duty)" + "0" = "Did not serve this period" + "1" = "Served this period" + ; + + value $ US2021C_MLPFG_f + "B" = "N/A (less than 17 years old/no active duty)" + "0" = "Did not serve this period" + "1" = "Served this period" + ; + + value $ US2021C_MLPH_f + "B" = "N/A (less than 17 years old/no active duty)" + "0" = "Did not serve this period" + "1" = "Served this period" + ; + + value $ US2021C_MLPJ_f + "B" = "N/A (less than 17 years old/no active duty)" + "0" = "Did not serve this period" + "1" = "Served this period" + ; + + value $ US2021C_SCHL_f + "BB" = "N/A (less than 3 years old)" + "01" = "No schooling completed" + "02" = "Nursery school, preschool" + "03" = "Kindergarten" + "04" = "Grade 1" + "05" = "Grade 2" + "06" = "Grade 3" + "07" = "Grade 4" + "08" = "Grade 5" + "09" = "Grade 6" + "10" = "Grade 7" + "11" = "Grade 8" + "12" = "Grade 9" + "13" = "Grade 10" + "14" = "Grade 11" + "15" = "12th grade - no diploma" + "16" = "Regular high school diploma" + "17" = "GED or alternative credential" + "18" = "Some college, but less than 1 year" + "19" = "1 or more years of college credit, no degree" + "20" = "Associate's degree" + "21" = "Bachelor's degree" + "22" = "Master's degree" + "23" = "Professional degree beyond a bachelor's degree" + "24" = "Doctorate degree" + ; + + value $ US2021C_SEX_f + "1" = "Male" + "2" = "Female" + ; + + value $ US2021C_HISP_f + "01" = "Not Spanish/Hispanic/Latino" + "02" = "Mexican" + "03" = "Puerto Rican" + "04" = "Cuban" + "05" = "Dominican" + "06" = "Costa Rican" + "07" = "Guatemalan" + "08" = "Honduran" + "09" = "Nicaraguan" + "10" = "Panamanian" + "11" = "Salvadoran" + "12" = "Other Central American" + "13" = "Argentinean" + "14" = "Bolivian" + "15" = "Chilean" + "16" = "Colombian" + "17" = "Ecuadorian" + "18" = "Paraguayan" + "19" = "Peruvian" + "20" = "Uruguayan" + "21" = "Venezuelan" + "22" = "Other South American" + "23" = "Spaniard" + "24" = "All Other Spanish/Hispanic/Latino" + ; + + value $ US2021C_LANP_f + "BBBB" = "N/A (GQ/vacant)" + "1000" = "Jamaican Creole English" + "1025" = "Other English-based Creole languages" + "1055" = "Haitian" + "1069" = "Kabuverdianu" + "1110" = "German" + "1120" = "Swiss German" + "1125" = "Pennsylvania German" + "1130" = "Yiddish" + "1132" = "Dutch" + "1134" = "Afrikaans" + "1140" = "Swedish" + "1141" = "Danish" + "1142" = "Norwegian" + "1155" = "Italian" + "1170" = "French" + "1175" = "Cajun French" + "1200" = "Spanish" + "1210" = "Portuguese" + "1220" = "Romanian" + "1231" = "Irish" + "1235" = "Greek" + "1242" = "Albanian" + "1250" = "Russian" + "1260" = "Ukrainian" + "1262" = "Czech" + "1263" = "Slovak" + "1270" = "Polish" + "1273" = "Bulgarian" + "1274" = "Macedonian" + "1275" = "Serbocroatian" + "1276" = "Bosnian" + "1277" = "Croatian" + "1278" = "Serbian" + "1281" = "Lithuanian" + "1283" = "Latvian" + "1288" = "Armenian" + "1290" = "Farsi" + "1292" = "Dari" + "1315" = "Kurdish" + "1327" = "Pashto" + "1340" = "India N.E.C." + "1350" = "Hindi" + "1360" = "Urdu" + "1380" = "Bengali" + "1420" = "Punjabi" + "1435" = "Konkani" + "1440" = "Marathi" + "1450" = "Gujarati" + "1500" = "Nepali" + "1525" = "Pakistan N.E.C." + "1530" = "Sinhala" + "1540" = "Other Indo-Iranian languages" + "1564" = "Other Indo-European languages" + "1565" = "Finnish" + "1582" = "Hungarian" + "1675" = "Turkish" + "1690" = "Mongolian" + "1730" = "Telugu" + "1737" = "Kannada" + "1750" = "Malayalam" + "1765" = "Tamil" + "1900" = "Khmer" + "1960" = "Vietnamese" + "1970" = "Chinese" + "2000" = "Mandarin" + "2030" = "Min Nan Chinese" + "2050" = "Cantonese" + "2100" = "Tibetan" + "2160" = "Burmese" + "2270" = "Chin languages" + "2350" = "Karen languages" + "2430" = "Thai" + "2475" = "Lao" + "2525" = "Iu Mien" + "2535" = "Hmong" + "2560" = "Japanese" + "2575" = "Korean" + "2715" = "Malay" + "2770" = "Indonesian" + "2850" = "Other languages of Asia" + "2910" = "Filipino" + "2920" = "Tagalog" + "2950" = "Cebuano" + "3150" = "Ilocano" + "3190" = "Other Philippine languages" + "3220" = "Chamorro" + "3270" = "Marshallese" + "3350" = "Chuukese" + "3420" = "Samoan" + "3500" = "Tongan" + "3570" = "Hawaiian" + "3600" = "Other Eastern Malayo-Polynesian languages" + "4500" = "Arabic" + "4545" = "Hebrew" + "4560" = "Assyrian Neo-Aramaic" + "4565" = "Chaldean Neo-Aramaic" + "4590" = "Amharic" + "4640" = "Tigrinya" + "4830" = "Oromo" + "4840" = "Somali" + "4880" = "Other Afro-Asiatic languages" + "4900" = "Nilo-Saharan languages" + "5150" = "Swahili" + "5345" = "Ganda" + "5525" = "Shona" + "5645" = "Other Bantu languages" + "5845" = "Manding languages" + "5900" = "Other Mande languages" + "5940" = "Fulah" + "5950" = "Wolof" + "6120" = "Akan (incl. Twi)" + "6205" = "Ga" + "6230" = "Gbe languages" + "6290" = "Yoruba" + "6300" = "Edoid languages" + "6370" = "Igbo" + "6500" = "Other Niger-Congo languages" + "6795" = "Other languages of Africa" + "6800" = "Aleut languages" + "6839" = "Ojibwa" + "6930" = "Apache languages" + "6933" = "Navajo" + "7019" = "Dakota languages" + "7032" = "Muskogean languages" + "7039" = "Keres" + "7050" = "Cherokee" + "7060" = "Uto-Aztecan languages" + "7124" = "Other Native North American languages" + "7300" = "Other Central and South American languages" + "9999" = "Other and unspecified languages" + ; + + value $ US2021C_MIGSP_f + "BBB" = "N/A (person less than 1 year old/lived in same house 1 year ago)" + "001" = "Alabama/AL" + "002" = "Alaska/AK" + "004" = "Arizona/AZ" + "005" = "Arkansas/AR" + "006" = "California/CA" + "008" = "Colorado/CO" + "009" = "Connecticut/CT" + "010" = "Delaware/DE" + "011" = "District of Columbia/DC" + "012" = "Florida/FL" + "013" = "Georgia/GA" + "015" = "Hawaii/HI" + "016" = "Idaho/ID" + "017" = "Illinois/IL" + "018" = "Indiana/IN" + "019" = "Iowa/IA" + "020" = "Kansas/KS" + "021" = "Kentucky/KY" + "022" = "Louisiana/LA" + "023" = "Maine/ME" + "024" = "Maryland/MD" + "025" = "Massachusetts/MA" + "026" = "Michigan/MI" + "027" = "Minnesota/MN" + "028" = "Mississippi/MS" + "029" = "Missouri/MO" + "030" = "Montana/MT" + "031" = "Nebraska/NE" + "032" = "Nevada/NV" + "033" = "New Hampshire/NH" + "034" = "New Jersey/NJ" + "035" = "New Mexico/NM" + "036" = "New York/NY" + "037" = "North Carolina/NC" + "038" = "North Dakota/ND" + "039" = "Ohio/OH" + "040" = "Oklahoma/OK" + "041" = "Oregon/OR" + "042" = "Pennsylvania/PA" + "044" = "Rhode Island/RI" + "045" = "South Carolina/SC" + "046" = "South Dakota/SD" + "047" = "Tennessee/TN" + "048" = "Texas/TX" + "049" = "Utah/UT" + "050" = "Vermont/VT" + "051" = "Virginia/VA" + "053" = "Washington/WA" + "054" = "West Virginia/WV" + "055" = "Wisconsin/WI" + "056" = "Wyoming/WY" + "072" = "Puerto Rico" + "109" = "France" + "110" = "Germany" + "111" = "Northern Europe, Not Specified" + "113" = "Eastern Europe, Not Specified" + "114" = "Western Europe or Other Europe, Not Specified" + "120" = "Italy" + "134" = "Spain" + "138" = "United Kingdom, Excluding England" + "139" = "England" + "163" = "Russia" + "164" = "Ukraine" + "200" = "Afghanistan" + "207" = "China, Hong Kong, Macau And Paracel Islands" + "210" = "India" + "214" = "Israel" + "215" = "Japan" + "217" = "Korea" + "229" = "Nepal" + "231" = "Pakistan" + "233" = "Philippines" + "235" = "Saudi Arabia" + "240" = "Taiwan" + "242" = "Thailand" + "243" = "Turkey" + "245" = "United Arab Emirates" + "247" = "Vietnam" + "251" = "Eastern Asia, Not Specified" + "252" = "Western Asia, Not Specified" + "253" = "South Central Asia or Asia, Not Specified" + "301" = "Canada" + "303" = "Mexico" + "312" = "El Salvador" + "313" = "Guatemala" + "314" = "Honduras" + "317" = "Central America, Not Specified" + "327" = "Cuba" + "329" = "Dominican Republic" + "332" = "Haiti" + "333" = "Jamaica" + "344" = "Caribbean and North America, Not Specified" + "362" = "Brazil" + "364" = "Colombia" + "365" = "Ecuador" + "370" = "Peru" + "373" = "Venezuela" + "374" = "South America, Not Specified" + "414" = "Egypt" + "416" = "Ethiopia" + "427" = "Kenya" + "440" = "Nigeria" + "467" = "Western Africa, Not Specified" + "468" = "Other Africa, Not Specified" + "469" = "Eastern Africa, Not Specified" + "501" = "Australia" + "555" = "Other US Island Areas, Oceania, Not Specified, or At Sea" + ; + + value $ US2021C_NATIVITY_f + "1" = "Native" + "2" = "Foreign born" + ; + + value $ US2021C_POBP_f + "001" = "Alabama/AL" + "002" = "Alaska/AK" + "004" = "Arizona/AZ" + "005" = "Arkansas/AR" + "006" = "California/CA" + "008" = "Colorado/CO" + "009" = "Connecticut/CT" + "010" = "Delaware/DE" + "011" = "District of Columbia/DC" + "012" = "Florida/FL" + "013" = "Georgia/GA" + "015" = "Hawaii/HI" + "016" = "Idaho/ID" + "017" = "Illinois/IL" + "018" = "Indiana/IN" + "019" = "Iowa/IA" + "020" = "Kansas/KS" + "021" = "Kentucky/KY" + "022" = "Louisiana/LA" + "023" = "Maine/ME" + "024" = "Maryland/MD" + "025" = "Massachusetts/MA" + "026" = "Michigan/MI" + "027" = "Minnesota/MN" + "028" = "Mississippi/MS" + "029" = "Missouri/MO" + "030" = "Montana/MT" + "031" = "Nebraska/NE" + "032" = "Nevada/NV" + "033" = "New Hampshire/NH" + "034" = "New Jersey/NJ" + "035" = "New Mexico/NM" + "036" = "New York/NY" + "037" = "North Carolina/NC" + "038" = "North Dakota/ND" + "039" = "Ohio/OH" + "040" = "Oklahoma/OK" + "041" = "Oregon/OR" + "042" = "Pennsylvania/PA" + "044" = "Rhode Island/RI" + "045" = "South Carolina/SC" + "046" = "South Dakota/SD" + "047" = "Tennessee/TN" + "048" = "Texas/TX" + "049" = "Utah/UT" + "050" = "Vermont/VT" + "051" = "Virginia/VA" + "053" = "Washington/WA" + "054" = "West Virginia/WV" + "055" = "Wisconsin/WI" + "056" = "Wyoming/WY" + "060" = "American Samoa" + "066" = "Guam" + "069" = "Commonwealth of the Northern Mariana Islands" + "072" = "Puerto Rico" + "078" = "US Virgin Islands" + "100" = "Albania" + "102" = "Austria" + "103" = "Belgium" + "104" = "Bulgaria" + "105" = "Czechoslovakia" + "106" = "Denmark" + "108" = "Finland" + "109" = "France" + "110" = "Germany" + "116" = "Greece" + "117" = "Hungary" + "118" = "Iceland" + "119" = "Ireland" + "120" = "Italy" + "126" = "Netherlands" + "127" = "Norway" + "128" = "Poland" + "129" = "Portugal" + "130" = "Azores Islands" + "132" = "Romania" + "134" = "Spain" + "136" = "Sweden" + "137" = "Switzerland" + "138" = "United Kingdom, Not Specified" + "139" = "England" + "140" = "Scotland" + "142" = "Northern Ireland" + "147" = "Yugoslavia" + "148" = "Czech Republic" + "149" = "Slovakia" + "150" = "Bosnia and Herzegovina" + "151" = "Croatia" + "152" = "Macedonia" + "154" = "Serbia" + "156" = "Latvia" + "157" = "Lithuania" + "158" = "Armenia" + "159" = "Azerbaijan" + "160" = "Belarus" + "161" = "Georgia" + "162" = "Moldova" + "163" = "Russia" + "164" = "Ukraine" + "165" = "USSR" + "166" = "Europe" + "167" = "Kosovo" + "168" = "Montenegro" + "169" = "Other Europe, Not Specified" + "200" = "Afghanistan" + "202" = "Bangladesh" + "203" = "Bhutan" + "205" = "Myanmar" + "206" = "Cambodia" + "207" = "China" + "209" = "Hong Kong" + "210" = "India" + "211" = "Indonesia" + "212" = "Iran" + "213" = "Iraq" + "214" = "Israel" + "215" = "Japan" + "216" = "Jordan" + "217" = "Korea" + "218" = "Kazakhstan" + "219" = "Kyrgyzstan" + "222" = "Kuwait" + "223" = "Laos" + "224" = "Lebanon" + "226" = "Malaysia" + "228" = "Mongolia" + "229" = "Nepal" + "231" = "Pakistan" + "233" = "Philippines" + "235" = "Saudi Arabia" + "236" = "Singapore" + "238" = "Sri Lanka" + "239" = "Syria" + "240" = "Taiwan" + "242" = "Thailand" + "243" = "Turkey" + "245" = "United Arab Emirates" + "246" = "Uzbekistan" + "247" = "Vietnam" + "248" = "Yemen" + "249" = "Asia" + "253" = "South Central Asia, Not Specified" + "254" = "Other Asia, Not Specified" + "300" = "Bermuda" + "301" = "Canada" + "303" = "Mexico" + "310" = "Belize" + "311" = "Costa Rica" + "312" = "El Salvador" + "313" = "Guatemala" + "314" = "Honduras" + "315" = "Nicaragua" + "316" = "Panama" + "321" = "Antigua and Barbuda" + "323" = "Bahamas" + "324" = "Barbados" + "327" = "Cuba" + "328" = "Dominica" + "329" = "Dominican Republic" + "330" = "Grenada" + "332" = "Haiti" + "333" = "Jamaica" + "338" = "St. Kitts-Nevis" + "339" = "St. Lucia" + "340" = "St. Vincent and the Grenadines" + "341" = "Trinidad and Tobago" + "343" = "West Indies" + "344" = "Caribbean, Not Specified" + "360" = "Argentina" + "361" = "Bolivia" + "362" = "Brazil" + "363" = "Chile" + "364" = "Colombia" + "365" = "Ecuador" + "368" = "Guyana" + "369" = "Paraguay" + "370" = "Peru" + "372" = "Uruguay" + "373" = "Venezuela" + "374" = "South America" + "399" = "Americas, Not Specified" + "400" = "Algeria" + "407" = "Cameroon" + "408" = "Cabo Verde" + "412" = "Congo" + "414" = "Egypt" + "416" = "Ethiopia" + "417" = "Eritrea" + "420" = "Gambia" + "421" = "Ghana" + "423" = "Guinea" + "425" = "Ivory Coast" + "427" = "Kenya" + "429" = "Liberia" + "430" = "Libya" + "436" = "Morocco" + "440" = "Nigeria" + "442" = "Rwanda" + "444" = "Senegal" + "447" = "Sierra Leone" + "448" = "Somalia" + "449" = "South Africa" + "451" = "Sudan" + "453" = "Tanzania" + "454" = "Togo" + "456" = "Tunisia" + "457" = "Uganda" + "459" = "Democratic Republic of Congo (Zaire)" + "460" = "Zambia" + "461" = "Zimbabwe" + "462" = "Africa" + "463" = "South Sudan" + "464" = "Northern Africa, Not Specified" + "467" = "Western Africa, Not Specified" + "468" = "Other Africa, Not Specified" + "469" = "Eastern Africa, Not Specified" + "501" = "Australia" + "508" = "Fiji" + "511" = "Marshall Islands" + "512" = "Micronesia" + "515" = "New Zealand" + "523" = "Tonga" + "527" = "Samoa" + "554" = "Other US Island Areas, Oceania, Not Specified, or at Sea" + ; + + value $ US2021C_RAC1P_f + "1" = "White alone" + "2" = "Black or African American alone" + "3" = "American Indian alone" + "4" = "Alaska Native alone" + "5" = "American Indian and Alaska Native tribes specified; or American Indian or Alaska Native, not specifi" + "ed and no other races" + "6" = "Asian alone" + "7" = "Native Hawaiian and Other Pacific Islander alone" + "8" = "Some Other Race alone" + "9" = "Two or More Races" + ; + +run; + +data IPUMS.raw_p; + infile dat_p pad missover lrecl=633; + + input + YEAR 1-4 + MULTYEAR 5-8 + SAMPLE 9-14 + SERIAL 15-22 + CBSERIAL 23-35 + HHWT 36-45 .2 + CLUSTER 46-58 + STATEFIP 59-60 + PUMA 61-65 + STRATA 66-77 + GQ 78-78 + US2021C_HHT $ 79-79 + PERNUM 80-83 + PERWT 84-93 .2 + REPWTP 94-94 + REPWTP1 95-100 + REPWTP2 101-106 + REPWTP3 107-112 + REPWTP4 113-118 + REPWTP5 119-124 + REPWTP6 125-130 + REPWTP7 131-136 + REPWTP8 137-142 + REPWTP9 143-148 + REPWTP10 149-154 + REPWTP11 155-160 + REPWTP12 161-166 + REPWTP13 167-172 + REPWTP14 173-178 + REPWTP15 179-184 + REPWTP16 185-190 + REPWTP17 191-196 + REPWTP18 197-202 + REPWTP19 203-208 + REPWTP20 209-214 + REPWTP21 215-220 + REPWTP22 221-226 + REPWTP23 227-232 + REPWTP24 233-238 + REPWTP25 239-244 + REPWTP26 245-250 + REPWTP27 251-256 + REPWTP28 257-262 + REPWTP29 263-268 + REPWTP30 269-274 + REPWTP31 275-280 + REPWTP32 281-286 + REPWTP33 287-292 + REPWTP34 293-298 + REPWTP35 299-304 + REPWTP36 305-310 + REPWTP37 311-316 + REPWTP38 317-322 + REPWTP39 323-328 + REPWTP40 329-334 + REPWTP41 335-340 + REPWTP42 341-346 + REPWTP43 347-352 + REPWTP44 353-358 + REPWTP45 359-364 + REPWTP46 365-370 + REPWTP47 371-376 + REPWTP48 377-382 + REPWTP49 383-388 + REPWTP50 389-394 + REPWTP51 395-400 + REPWTP52 401-406 + REPWTP53 407-412 + REPWTP54 413-418 + REPWTP55 419-424 + REPWTP56 425-430 + REPWTP57 431-436 + REPWTP58 437-442 + REPWTP59 443-448 + REPWTP60 449-454 + REPWTP61 455-460 + REPWTP62 461-466 + REPWTP63 467-472 + REPWTP64 473-478 + REPWTP65 479-484 + REPWTP66 485-490 + REPWTP67 491-496 + REPWTP68 497-502 + REPWTP69 503-508 + REPWTP70 509-514 + REPWTP71 515-520 + REPWTP72 521-526 + REPWTP73 527-532 + REPWTP74 533-538 + REPWTP75 539-544 + REPWTP76 545-550 + REPWTP77 551-556 + REPWTP78 557-562 + REPWTP79 563-568 + REPWTP80 569-574 + US2021C_ADJINCP 575-581 .6 /* NOTE: changed informat from string to 8.6 numeric */ + US2021C_AGEP $ 582-583 + US2021C_FER $ 584-584 + US2021C_JWMNP $ 585-587 + US2021C_JWRIP $ 588-589 + US2021C_JWTRNS $ 590-591 + US2021C_LANX $ 592-592 + US2021C_MAR $ 593-593 + US2021C_MIG $ 594-594 + US2021C_MLPA $ 595-595 + US2021C_MLPB $ 596-596 + US2021C_MLPCD $ 597-597 + US2021C_MLPE $ 598-598 + US2021C_MLPFG $ 599-599 + US2021C_MLPH $ 600-600 + US2021C_MLPJ $ 601-601 + US2021C_SCHL $ 602-603 + US2021C_SEX $ 604-604 + US2021C_HISP $ 605-606 + US2021C_LANP $ 607-610 + US2021C_MIGPUMA $ 611-615 + US2021C_MIGSP $ 616-618 + US2021C_NATIVITY $ 619-619 + US2021C_PINCP $ 620-626 + US2021C_POBP $ 627-629 + US2021C_POVPIP $ 630-632 + US2021C_RAC1P $ 633-633 + ; + + label + YEAR = "Census year" + MULTYEAR = "Actual year of survey, multi-year ACS/PRCS" + SAMPLE = "IPUMS sample identifier" + SERIAL = "Household serial number" + CBSERIAL = "Original Census Bureau household serial number" + HHWT = "Household weight" + CLUSTER = "Household cluster for variance estimation" + STATEFIP = "State (FIPS code)" + PUMA = "Public Use Microdata Area" + STRATA = "Household strata for variance estimation" + GQ = "Group quarters status" + US2021C_HHT = "Household/family type" + PERNUM = "Person number in sample unit" + PERWT = "Person weight" + REPWTP = "Person replicate weights [80 variables]" + REPWTP1 = "Person replicate weight 1" + REPWTP2 = "Person replicate weight 2" + REPWTP3 = "Person replicate weight 3" + REPWTP4 = "Person replicate weight 4" + REPWTP5 = "Person replicate weight 5" + REPWTP6 = "Person replicate weight 6" + REPWTP7 = "Person replicate weight 7" + REPWTP8 = "Person replicate weight 8" + REPWTP9 = "Person replicate weight 9" + REPWTP10 = "Person replicate weight 10" + REPWTP11 = "Person replicate weight 11" + REPWTP12 = "Person replicate weight 12" + REPWTP13 = "Person replicate weight 13" + REPWTP14 = "Person replicate weight 14" + REPWTP15 = "Person replicate weight 15" + REPWTP16 = "Person replicate weight 16" + REPWTP17 = "Person replicate weight 17" + REPWTP18 = "Person replicate weight 18" + REPWTP19 = "Person replicate weight 19" + REPWTP20 = "Person replicate weight 20" + REPWTP21 = "Person replicate weight 21" + REPWTP22 = "Person replicate weight 22" + REPWTP23 = "Person replicate weight 23" + REPWTP24 = "Person replicate weight 24" + REPWTP25 = "Person replicate weight 25" + REPWTP26 = "Person replicate weight 26" + REPWTP27 = "Person replicate weight 27" + REPWTP28 = "Person replicate weight 28" + REPWTP29 = "Person replicate weight 29" + REPWTP30 = "Person replicate weight 30" + REPWTP31 = "Person replicate weight 31" + REPWTP32 = "Person replicate weight 32" + REPWTP33 = "Person replicate weight 33" + REPWTP34 = "Person replicate weight 34" + REPWTP35 = "Person replicate weight 35" + REPWTP36 = "Person replicate weight 36" + REPWTP37 = "Person replicate weight 37" + REPWTP38 = "Person replicate weight 38" + REPWTP39 = "Person replicate weight 39" + REPWTP40 = "Person replicate weight 40" + REPWTP41 = "Person replicate weight 41" + REPWTP42 = "Person replicate weight 42" + REPWTP43 = "Person replicate weight 43" + REPWTP44 = "Person replicate weight 44" + REPWTP45 = "Person replicate weight 45" + REPWTP46 = "Person replicate weight 46" + REPWTP47 = "Person replicate weight 47" + REPWTP48 = "Person replicate weight 48" + REPWTP49 = "Person replicate weight 49" + REPWTP50 = "Person replicate weight 50" + REPWTP51 = "Person replicate weight 51" + REPWTP52 = "Person replicate weight 52" + REPWTP53 = "Person replicate weight 53" + REPWTP54 = "Person replicate weight 54" + REPWTP55 = "Person replicate weight 55" + REPWTP56 = "Person replicate weight 56" + REPWTP57 = "Person replicate weight 57" + REPWTP58 = "Person replicate weight 58" + REPWTP59 = "Person replicate weight 59" + REPWTP60 = "Person replicate weight 60" + REPWTP61 = "Person replicate weight 61" + REPWTP62 = "Person replicate weight 62" + REPWTP63 = "Person replicate weight 63" + REPWTP64 = "Person replicate weight 64" + REPWTP65 = "Person replicate weight 65" + REPWTP66 = "Person replicate weight 66" + REPWTP67 = "Person replicate weight 67" + REPWTP68 = "Person replicate weight 68" + REPWTP69 = "Person replicate weight 69" + REPWTP70 = "Person replicate weight 70" + REPWTP71 = "Person replicate weight 71" + REPWTP72 = "Person replicate weight 72" + REPWTP73 = "Person replicate weight 73" + REPWTP74 = "Person replicate weight 74" + REPWTP75 = "Person replicate weight 75" + REPWTP76 = "Person replicate weight 76" + REPWTP77 = "Person replicate weight 77" + REPWTP78 = "Person replicate weight 78" + REPWTP79 = "Person replicate weight 79" + REPWTP80 = "Person replicate weight 80" + US2021C_ADJINCP = "Adjustment factor for income and earnings dollar amounts (6 implied decimal places)" + US2021C_AGEP = "Age" + US2021C_FER = "Gave birth to child within the past 12 months" + US2021C_JWMNP = "Travel time to work" + US2021C_JWRIP = "Vehicle occupancy" + US2021C_JWTRNS = "Means of transportation to work" + US2021C_LANX = "Language other than English spoken at home" + US2021C_MAR = "Marital status" + US2021C_MIG = "Mobility status (lived here 1 year ago)" + US2021C_MLPA = "Served September 2001 or later" + US2021C_MLPB = "Served August 1990 - August 2001 (including Persian Gulf War)" + US2021C_MLPCD = "Served May 1975 - July 1990" + US2021C_MLPE = "Served Vietnam era (August 1964 - April 1975)" + US2021C_MLPFG = "Served February 1955 - July 1964" + US2021C_MLPH = "Served Korean War (July 1950 - January 1955)" + US2021C_MLPJ = "Served World War II (December 1941 - December 1946)" + US2021C_SCHL = "Educational attainment" + US2021C_SEX = "Sex" + US2021C_HISP = "Recoded detailed Hispanic origin" + US2021C_LANP = "Language spoken at home" + US2021C_MIGPUMA = "Migration PUMA based on 2010 Census definition" + US2021C_MIGSP = "Migration recode - State or foreign country code" + US2021C_NATIVITY = "Nativity" + US2021C_PINCP = "Total person's income (signed, use ADJINC to adjust to constant dollars)" + US2021C_POBP = "Place of birth (Recode)" + US2021C_POVPIP = "Income-to-poverty ratio recode" + US2021C_RAC1P = "Recoded detailed race code" + ; + + format + STATEFIP STATEFIP_f. + GQ GQ_f. + US2021C_HHT US2021C_HHT_f. + REPWTP REPWTP_f. + US2021C_FER US2021C_FER_f. + US2021C_JWTRNS US2021C_JWTRNS_f. + US2021C_LANX US2021C_LANX_f. + US2021C_MAR US2021C_MAR_f. + US2021C_MIG US2021C_MIG_f. + US2021C_MLPA US2021C_MLPA_f. + US2021C_MLPB US2021C_MLPB_f. + US2021C_MLPCD US2021C_MLPCD_f. + US2021C_MLPE US2021C_MLPE_f. + US2021C_MLPFG US2021C_MLPFG_f. + US2021C_MLPH US2021C_MLPH_f. + US2021C_MLPJ US2021C_MLPJ_f. + US2021C_SCHL US2021C_SCHL_f. + US2021C_SEX US2021C_SEX_f. + US2021C_HISP US2021C_HISP_f. + US2021C_LANP US2021C_LANP_f. + US2021C_MIGSP US2021C_MIGSP_f. + US2021C_NATIVITY US2021C_NATIVITY_f. + US2021C_POBP US2021C_POBP_f. + US2021C_RAC1P US2021C_RAC1P_f. + ; + + format + CBSERIAL 13. + HHWT 11.2 + CLUSTER 13. + STRATA 12. + PERWT 11.2 + ; + +run; diff --git a/src/02_Prepare.sas b/src/02_Prepare.sas new file mode 100644 index 0000000..de143fd --- /dev/null +++ b/src/02_Prepare.sas @@ -0,0 +1,842 @@ +libname IPUMS "/home/u44593168"; + +data IPUMS.houses; + set IPUMS.raw_h; + + /* + VACANT + 1: Occuped + 2: Vacant + + Recoded from VACS + 1 (For rent) -> 2 + 2 (Rented, not occupied) -> 2 + 3 (For sale only) -> 2 + 4 (Sold, not occupied) -> 2 + 5 (For seasonal/recreational/occasional use) -> 2 + 6 (For migrant workers) -> 2 + 7 (Other vacant) -> 2 + Also recoded from TEN + 1 (Owned with mortgage or loan (include home equity loans)) -> 1 + 2 (Owned free and clear) -> 1 + 3 (Rented) -> 1 + 4 (Occupied without payment of rent) -> 1 + */ + if US2021C_VACS in ("1" "2" "3" "4" "5" "6" "7") then VACANT=2; + else if US2021C_TEN in ("1" "2" "3" "4") then VACANT=1; + + /* + NP_NUM + Numeric NP if occupied + */ + if US2021C_NP in ("B0" "B1" "B2" "B3" "B4" "B5" "B6" "B7" "B8" "B9") then NP_NUM=.; + else if VACANT in (. 2) then NP_NUM=.; + else NP_NUM=input(US2021C_NP, 2.); + + /* + OWN + Ownership of an occupied housing unit + 1: Owned + 2: Rented + + Recoded from TEN + 1 (Owned with mortgage or loan (include home equity loans)) -> 1 + 2 (Owned free and clear) -> 1 + 3 (Rented) -> 2 + 4 (Occupied without payment of rent) -> 2 + */ + if US2021C_TEN in ("B") then OWN=.; + else if US2021C_TEN in ("1" "2") then OWN=1; + else if US2021C_TEN in ("3" "4") then OWN=2; + + /* + BLD_CAT + housing units category + 1: Single unit + 2: Multi-unit + 3: Mobile home + 4: Boat, RV, van, etc. + + Recoded from BLD + 1 (Mobile home or trailer) -> 3 + 2 (One-family house detached) -> 1 + 3 (One-family house attached) -> 1 + 4 (2 Apartments) -> 2 + 5 (3-4 Apartments) -> 2 + 6 (5-9 Apartments) -> 2 + 7 (10-19 Apartments) -> 2 + 8 (20-49 Apartments) -> 2 + 9 (50 or more apartments) -> 2 + 10 (Boat, RV, van, etc.) -> 4 + */ + if US2021C_BLD="BB" then BLD_CAT=.; + else if US2021C_BLD="01" then BLD_CAT=3; + else if US2021C_BLD in ("02" "03") then BLD_CAT=1; + else if US2021C_BLD in ("04" "05" "06" "07" "08" "09") then BLD_CAT=2; + else if US2021C_BLD="10" then BLD_CAT=4; + + /* + year moved in category + 1: Before 1990 + 2: 1990s + 3: 2000s + 4: 2010-2014 + 5: 2015-2016 + 6: Since 2017 + */ + + /* + VALP_NUM + Numeric VALP + */ + if US2021C_VALP="BBBBBBB" then VALP_NUM=.; + else VALP_NUM=input(US2021C_VALP, 7.); + + /* + VAL + Property value adjusted for current year + */ + if VALP_NUM=. then VAL=.; + else if US2021C_ADJHSG in (. 0) then VAL=.; + else VAL=VALP_NUM*US2021C_ADJHSG; + + /* + VAL_OWN + Property value of a housing unit if owner occupied, less than 10 acres, and not a mobile home + */ + if VAL=. then VAL_OWN=.; + else if OWN in (. 0) then VAL_OWN=.; + else if US2021C_SVAL in ("B" "0") then VAL_OWN=.; + else VAL_OWN=VAL; + + /* + VAL_OWN_CAT + Categories of property value if owner occupied, less than 10 acres, and not a mobile home + 1: Up to $100k + 2: $100k-$200k + 3: $200k-$300k + 4: $300k-$400k + 5: $400k-$500k + 6: $500k-$1m + 7: $1m+ + */ + if VAL=. then VAL_OWN=.; + else if OWN in (. 2) then VAL_OWN_CAT=.; + else if US2021C_SVAL in ("B" "0") then VAL_OWN=.; + else if VAL_OWN<=99999 then VAL_OWN_CAT=1; + else if 100000<=VAL_OWN<=199999 then VAL_OWN_CAT=2; + else if 200000<=VAL_OWN<=299999 then VAL_OWN_CAT=3; + else if 300000<=VAL_OWN<=399999 then VAL_OWN_CAT=4; + else if 400000<=VAL_OWN<=499999 then VAL_OWN_CAT=5; + else if 500000<=VAL_OWN<=999999 then VAL_OWN_CAT=6; + else if 1000000<=VAL_OWN then VAL_OWN_CAT=7; +run; + +data IPUMS.people; + set IPUMS.raw_p; + /* + AGEP_NUM + Numeric AGEP + + Recoded from AGEP + B0-B9 -> . + */ + if US2021C_AGEP in ("B0" "B1" "B2" "B3" "B4" "B5" "B6" "B7" "B8" "B9") then AGEP_NUM=.; + else AGEP_NUM = input(US2021C_AGEP, 2.); + + /* + AGEP_CAT + Categories of AGEP + 1: Under 10 + 2: 10-19 + 3: 20-29 + 4: 30-39 + 5: 40-49 + 6: 50-59 + 7: 60-69 + 8: 70-79 + 9: 80 and over + */ + if AGEP_NUM=. then AGEP_CAT=.; + else if AGEP_NUM<=9 then AGEP_CAT=1; + else if 10<=AGEP_NUM<=19 then AGEP_CAT=2; + else if 20<=AGEP_NUM<=29 then AGEP_CAT=3; + else if 30<=AGEP_NUM<=39 then AGEP_CAT=4; + else if 40<=AGEP_NUM<=49 then AGEP_CAT=5; + else if 50<=AGEP_NUM<=59 then AGEP_CAT=6; + else if 60<=AGEP_NUM<=69 then AGEP_CAT=7; + else if 70<=AGEP_NUM<=79 then AGEP_CAT=8; + else if 80<=AGEP_NUM then AGEP_CAT=9; + + /* + AGEP_CAT2 + Categories of AGEP + 1: Under 18 + 2: 18-64 + 3: 65 and over + */ + if AGEP_NUM=. then AGEP_CAT2=.; + else if AGEP_NUM<=17 then AGEP_CAT2=1; + else if 18<=AGEP_NUM<=64 then AGEP_CAT2=2; + else if 65<=AGEP_NUM then AGEP_CAT2=3; + + /* + AGEP_CAT3 + Categories of AGEP + 1: Under 18 + 2: 18 and over + */ + if AGEP_NUM=. then AGEP_CAT3=.; + else if AGEP_NUM<=17 then AGEP_CAT3=1; + else if 18<=AGEP_NUM then AGEP_CAT3=2; + + /* + SEX + Categories of sex + 1: Male + 2: Female + */ + if US2021C_SEX="B" then SEX=.; + else if US2021C_SEX="1" then SEX=1; + else if US2021C_SEX="2" then SEX=2; + + /* + RACE_ETH - combined race and ethnicity + 1: White + 2: Black + 3: American Indian/Alaska Native + 4: Asian + 5: Native Hawaiian/Pacific Islander + 6: Other + 7: Multiple + 8: Hispanic + + Recoded from RAC1P + 1 (White alone) -> 1 + 2 (Black or African American alone) -> 2 + 3 (American Indian alone) -> 3 + 4 (Alaska Native alone) -> 3 + 5 (American Indian and/or Alaska Native tribes alone) -> 3 + 6 (Asian alone) -> 4 + 7 (Native Hawaiian and Other Pacific Islander alone) -> 5 + 8 (Some Other Race alone) -> 6 + 9 (Two or More Races) -> 7 + . -> . + Also recoded from HISP + 1 (Not Spanish/Hispanic/Latino) -> keep RAC1P recode + 2-24 (any of 23 different Hispanic categories) -> 8 + */ + if US2021C_RAC1P="B" then RACE_ETH=.; + else if US2021C_RAC1P="1" then RACE_ETH=1; + else if US2021C_RAC1P="2" then RACE_ETH=2; + else if US2021C_RAC1P in ("3" "4" "5") then RACE_ETH=3; + else if US2021C_RAC1P="6" then RACE_ETH=4; + else if US2021C_RAC1P="7" then RACE_ETH=5; + else if US2021C_RAC1P="8" then RACE_ETH=6; + else if US2021C_RAC1P="9" then RACE_ETH=7; + if US2021C_HISP in ("02" "03" "04" "05" "06" "07" "08" "09" "10" "11" "12" "13" "14" "15" "16" "17" "18" "19" "20" "21" "22" "23" "24") then RACE_ETH=8; + + /* + PINCP_NUM + Numeric PINCP + */ + if US2021C_PINCP="BBBBBBB" then PINCP_NUM=.; + else PINCP_NUM=input(US2021C_PINCP, 7.); + + /* + INC + Personal income adjusted for current year + */ + if PINCP_NUM=. then INC=.; + else if US2021C_ADJINCP in (. 0) then INC=.; + else INC=PINCP_NUM*US2021C_ADJINCP; + + /* + POV + Poverty status + 1: Poverty + 2: Non-poverty + */ + if US2021C_POVPIP="BBB" then POV=.; + else if US2021C_POVPIP<100 then POV=1; + else POV=2; + + /* + COMM + Commute minutes if 16 or older + */ + if US2021C_JWMNP="BBB" then COMM=.; + else if AGEP_NUM<16 then COMM=.; + else COMM=input(US2021C_JWMNP, 3.); + + /* + COMM_CAT + Commute categories if 16 or older + 1: Drove alone + 2: Carpooled + 3: Public transit + 4: Bicycle + 5: Walked + 6: Other + 7: Worked at home + + Recoded from JWTRNS + 1 (Car, truck, or van) -> 1 + 2 (Bus) -> 3 + 3 (Subway or elevated rail) -> 3 + 4 (Long-distance train or commuter rail) -> 3 + 5 (Light rail, streetcar, or trolley) -> 3 + 6 (Ferryboat) -> 3 + 7 (Taxicab) -> 6 + 8 (Motorcycle) -> 6 + 9 (Bicycle) -> 4 + 10 (Walked) -> 5 + 11 (Worked from home) -> 7 + 12 (Other method) -> 6 + Also recoded from JWRIP + 1 (Drove alone) -> 1 + 2-10 (In 2+ person carpool) -> 2 + */ + if US2021C_JWTRNS="BB" then COMM_CAT=.; + else if US2021C_JWTRNS="01" then COMM_CAT=1; + else if US2021C_JWTRNS in ("02" "03" "04" "05" "06") then COMM_CAT=3; + else if US2021C_JWTRNS="07" then COMM_CAT=6; + else if US2021C_JWTRNS="08" then COMM_CAT=6; + else if US2021C_JWTRNS="09" then COMM_CAT=4; + else if US2021C_JWTRNS="10" then COMM_CAT=5; + else if US2021C_JWTRNS="11" then COMM_CAT=7; + else if US2021C_JWTRNS="12" then COMM_CAT=6; + if US2021C_JWTRNS="01" and US2021C_JWRIP="01" then COMM_CAT=1; + else if US2021C_JWTRNS="01" and US2021C_JWRIP in ("02" "03" "04" "05" "06" "07" "08" "09" "10") then COMM_CAT=2; + if AGEP_NUM<16 then COMM_CAT=.; + + /* + HHT_CAT + House type category + 1: Married couples + 2: Male householder + 3: Female householder + 4: Non-family + + Recoded from HHT + 1 (Married couple household) -> 1 + 2 (Other family household: Male householder, no spouse present) -> 2 + 3 (Other family household: Female householder, no spouse present) -> 3 + 4 (Nonfamily household: Male householder: Living alone) -> 4 + 5 (Nonfamily household: Male householder: Not living alone) -> 4 + 6 (Nonfamily household: Female householder: Living alone) -> 4 + 7 (Nonfamily household: Female householder: Not living alone) -> 4 + */ + if US2021C_HHT="B" then HHT_CAT=.; + else if US2021C_HHT="1" then HHT_CAT=1; + else if US2021C_HHT="2" then HHT_CAT=2; + else if US2021C_HHT="3" then HHT_CAT=3; + else if US2021C_HHT in ("4" "5" "6" "7") then HHT_CAT=4; + + /* + MARRIED + Marital status if 15 or older + 1: Married + 2: Not married + + Recoded from MAR + 1 (Married) -> 1 + 2 (Widowed) -> 2 + 3 (Divorced) -> 2 + 4 (Separated) -> 2 + 5 (Never married or under 15 years old) -> 2 + */ + if US2021C_MAR="B" then MARRIED=.; + else if AGEP_NUM=. or AGEP_NUM<15 then MARRIED=.; + else if US2021C_MAR="1" then MARRIED=1; + else if US2021C_MAR in ("2" "3" "4" "5") then MARRIED=2; + + /* + MAR_CAT + Categories of marital status if 15 or older + 1: Never married + 2: Married + 3: Divorced or separated + 4: Widowed + + Recoded from MAR + 1 (Married) -> 2 + 2 (Widowed) -> 4 + 3 (Divorced) -> 3 + 4 (Separated) -> 3 + 5 (Never married or under 15 years old) -> 1 + */ + if US2021C_MAR="B" then MAR_CAT=.; + else if AGEP_NUM=. or AGEP_NUM<15 then MARRIED=.; + else if US2021C_MAR="5" then MAR_CAT=1; + else if US2021C_MAR="1" then MAR_CAT=2; + else if US2021C_MAR in ("3" "4") then MAR_CAT=3; + else if US2021C_MAR="2" then MAR_CAT=4; + + /* + BIRTH + Gave birth in last year if woman and 15-50 + 1: Gave birth in last year + 2: Did not give birth in last year + + Recoded from FER + 1 (Yes) -> 1 + 2 (No) -> 2 + */ + if US2021C_FER="B" then BIRTH=.; + else if US2021C_FER="1" then BIRTH=1; + else if US2021C_FER="2" then BIRTH=2; + + /* + AGEP_CAT_BIRTH + Age categories if woman and 15-50 + 1: 15-19 + 2: 20-24 + 3: 25-29 + 4: 30-35 + 5: 35-39 + 6: 40-44 + 7: 45-50 + */ + if SEX in (. 1) then AGEP_CAT_BIRTH=.; + else if AGEP_NUM=. then AGEP_CAT_BIRTH=.; + else if 15<=AGEP_NUM<=19 then AGEP_CAT_BIRTH=1; + else if 20<=AGEP_NUM<=24 then AGEP_CAT_BIRTH=2; + else if 25<=AGEP_NUM<=29 then AGEP_CAT_BIRTH=3; + else if 30<=AGEP_NUM<=34 then AGEP_CAT_BIRTH=4; + else if 35<=AGEP_NUM<=39 then AGEP_CAT_BIRTH=5; + else if 40<=AGEP_NUM<=44 then AGEP_CAT_BIRTH=6; + else if 45<=AGEP_NUM<=50 then AGEP_CAT_BIRTH=7; + + /* + MIG_CAT + moved in last year + 1: Moved in last year + 2: Did not move in last year + + Recoded from MIG + 1 (Yes, same house (nonmovers)) -> 2 + 2 (No, outside US and Puerto Rico) -> 1 + 3 (No, different house in US or Puerto Rico) -> 1 + */ + if US2021C_MIG="B" then MIG_CAT=.; + else if US2021C_MIG="1" then MIG_CAT=2; + else if US2021C_MIG in ("2" "3") then MIG_CAT=1; + + /* + MIGSP_NUM + Numeric MIGSP + */ + if US2021C_MIGSP="BBB" then MIGSP_NUM=.; + else MIGSP_NUM=input(US2021C_MIGSP, 3.); + + /* + MIGPUMA_NUM + Numeric MIGPUMA + */ + if US2021C_MIGPUMA="BBBBB" then MIGPUMA_NUM=.; + else MIGPUMA_NUM=input(US2021C_MIGPUMA, 5.); + + /* + MIG_CAT2 + lived last year categories + 1: Same house year ago + 2: From same county + 3: From different county + 4: From different state + 5: From abroad + + Recoded from MIG + 1 (Yes, same house (nonmovers)) -> 1 + 2 (No, outside US and Puerto Rico) -> 5 + 3 (No, different house in US or Puerto Rico) ... + - If MIGSP~=17 (Illinois) -> 4 + - If MIGSP=17 but MIGPUMA~=03400 (Cook county) -> 3 + - If MIGSP=17 and MIGPUMA=03400 -> 2 + */ + if US2021C_MIG="B" then MIG_CAT2=.; + else if US2021C_MIG="1" then MIG_CAT2=1; + else if US2021C_MIG="3" and MIGSP_NUM=17 and MIGPUMA_NUM=3400 then MIG_CAT2=2; + else if US2021C_MIG="3" and MIGSP_NUM=17 then MIG_CAT2=3; + else if US2021C_MIG="3" then MIG_CAT2=4; + else if US2021C_MIG="2" then MIG_CAT2=5; + + /* + HS + 1: High school graduate + 2: Not a high school graduate + + Recoded from SCHL + 1-15 (up to 12th grade no diploma) -> 2 + 16-24 (Regular high school diploma or more) -> 1 + */ + if US2021C_SCHL="BB" then HS=.; + else if AGEP_NUM<25 then SCHL_CAT=.; + else if US2021C_SCHL in ("01" "02" "03" "04" "05" "06" "07" "08" "09" "10" "11" "12" "13" "14" "15") then HS=2; + else if US2021C_SCHL in ("16" "17" "18" "19" "20" "21" "22" "23" "24") then HS=1; + + /* + COLLEGE + 1: Bachelor's degree + 2: No bachelor's degree + + Recoded from SCHL + 1-20 (up to Associate's degree) -> 2 + 21-24 (Bachelor's degree or more) -> 1 + */ + if US2021C_SCHL="BB" then COLLEGE=.; + else if AGEP_NUM<25 then SCHL_CAT=.; + else if US2021C_SCHL in ("01" "02" "03" "04" "05" "06" "07" "08" "09" "10" "11" "12" "13" "14" "15" "16" "17" "18" "19" "20") then COLLEGE=2; + else if US2021C_SCHL in ("21" "22" "23" "24") then COLLEGE=1; + + /* + SCHL_CAT + Education categories if 25 or older + 1: No degree + 2: High school + 3: Some college + 4: Bachelor's + 5: Post-grad + + Recoded from SCHL + 1-15 (up to 12th grade no diploma) -> 1 + 16 (Regular high school diploma) -> 2 + 17 (GED or alternative credential) -> 2 + 18 (Some college, but less than 1 year) -> 3 + 19 (1 or more years of college credit, no degree) -> 3 + 20 (Associate's degree) -> 3 + 21 (Bachelor's degree) -> 4 + 22 (Master's degree) -> 5 + 23 (Professional degree beyond a bachelor's degree) -> 5 + 24 (Doctorate degree) -> 5 + */ + if US2021C_SCHL="BB" then SCHL_CAT=.; + else if AGEP_NUM<25 then SCHL_CAT=.; + else if US2021C_SCHL in ("01" "02" "03" "04" "05" "06" "07" "08" "09" "10" "11" "12" "13" "14" "15") then SCHL_CAT=1; + else if US2021C_SCHL in ("16" "17") then SCHL_CAT=2; + else if US2021C_SCHL in ("18" "19" "20") then SCHL_CAT=3; + else if US2021C_SCHL in ("21") then SCHL_CAT=4; + else if US2021C_SCHL in ("22" "23" "24") then SCHL_CAT=5; + + /* + LAN_CAT + Language spoken in home categories + 1: Language other than English spoken at home + 2: English only + + Recoded from LANX + 1 (Yes, speaks another language) -> 1 + 2 (No, speaks only English) -> 2 + */ + if US2021C_LANX="B" then LAN_CAT=.; + else if US2021C_LANX="1" then LAN_CAT=1; + else if US2021C_LANX="2" then LAN_CAT=2; + + /* + LANP_NUM + Numeric LANP + */ + if US2021C_LANP="BBBB" then LANP_NUM=.; + else LANP_NUM=input(US2021C_LANP,4.); + + /* + LAN_CAT2 + Language spoken in home categories + 1: English only + 2: Spanish + 3: Indo-European + 4: Asian/Islander + 5: Other + + Recoded from LANX + 2 (No, speaks only English) -> 1 + Also recoded from LANP + 1200-1205 -> 2 + 1053-1056, 1069-1073, 1110-1564, 1711-1799 -> 3 + 1643-1710, 1800-3798 -> 4 + 1000-1052, 1057-1063, 1074-1109, 1565-1642, 3799-9499, 9600-9999 -> 5 + */ + if US2021C_LANX="B" then LAN_CAT2=.; + else if US2021C_LANX="2" then LAN_CAT2=1; + else if 1200<=US2021C_LANP<=1205 then LAN_CAT2=2; + else if 1053<=US2021C_LANP<=1056 or 1069<=US2021C_LANP<=1073 or 1110<=US2021C_LANP<=1564 or 1711<=US2021C_LANP<=1799 then LAN_CAT2=3; + else if 1643<=US2021C_LANP<=1710 or 1800<=US2021C_LANP<=3798 then LAN_CAT2=4; + else if 1000<=US2021C_LANP<=1052 or 1057<=US2021C_LANP<=1063 or 1074<=US2021C_LANP<=1109 or 1565<=US2021C_LANP<=1642 or 3799<=US2021C_LANP<=9499 or 9600<=US2021C_LANP<=9999 then LAN_CAT2=5; + + /* + FOREIGN + Foreign-born status + 1: Foreign-born + 2: Not foreign-born + + Recoded from NATIVITY + 1 (Native) -> 2 + 2 (Foreign born) -> 1 + */ + if US2021C_NATIVITY="B" then FOREIGN=.; + else if US2021C_NATIVITY="1" then FOREIGN=2; + else if US2021C_NATIVITY="2" then FOREIGN=1; + + /* + POBP_NUM + Numeric POBP if foreign-born + */ + if US2021C_POBP="BBB" then POBP_NUM=.; + else if US2021C_NATIVITY="1" then POBP_NUM=.; + else POBP_NUM=input(US2021C_POBP, 3.); + + /* + POBP_CAT + Place of birth categories if foreign-born + 1: Europe + 2: Asia + 3: Africa + 4: Oceania + 5: Latin America + 6: North America + */ + if POBP_NUM=. then POBP_CAT=.; + else if 100<=POBP_NUM<=169 then POBP_CAT=1; + else if 200<=POBP_NUM<=254 then POBP_CAT=2; + else if POBP_NUM=300 or 302<=POBP_NUM<=399 then POBP_CAT=5; + else if POBP_NUM=301 then POBP_CAT=6; + else if 400<=POBP_NUM<=469 then POBP_CAT=3; + else if 501<=POBP_NUM<=554 then POBP_CAT=4; + + /* + MLPJ2 + WW2 Veteran status + 1: Veteran + */ + if US2021C_MLPJ in ("B" "0") then MLPJ2=.; + else if US2021C_MLPJ="1" then MLPJ2=1; + + /* + MLPH2 + Korea Veteran status + 1: Veteran + */ + if US2021C_MLPH in ("B" "0") then MLPH2=.; + else if US2021C_MLPH="1" then MLPH2=1; + + /* + MLPFG2 + February 1955 - July 1964 Veteran status + 1: Veteran + */ + if US2021C_MLPFG in ("B" "0") then MLPFG2=.; + else if US2021C_MLPFG="1" then MLPFG2=1; + + /* + MLPE2 + Vietnam Veteran status + 1: Veteran + */ + if US2021C_MLPE in ("B" "0") then MLPE2=.; + else if US2021C_MLPE="1" then MLPE2=1; + + /* + MLPCD2 + May 1975 - July 1990 Veteran status + 1: Veteran + */ + if US2021C_MLPCD in ("B" "0") then MLPCD2=.; + else if US2021C_MLPCD="1" then MLPCD2=1; + + /* + MLPB2 + Gulf (1990s) Veteran status + 1: Veteran + */ + if US2021C_MLPB in ("B" "0") then MLPB2=.; + else if US2021C_MLPB="1" then MLPB2=1; + + /* + MLPA2 + Gulf (2001-) Veteran status + 1: Veteran + */ + if US2021C_MLPA in ("B" "0") then MLPA2=.; + else if US2021C_MLPA="1" then MLPA2=1; + + /* + VET + Veteran status + 1: Veteran + 2: Not veteran + */ + if MLPJ2=1 or MLPH2=1 or MLPFG2=1 or MLPE2=1 or MLPCD2=1 or MLPB2=1 or MLPA2=1 then VET=1; + else VET=2; +run; + +proc summary data=IPUMS.people; + class SERIAL; + var INC; + output out=IPUMS.sumpeople sum=HHINC; +run; + +data IPUMS.mergehouses; + /* + HHINC + Household income + + Merged from an aggregation of the person-level data table + */ + merge + IPUMS.houses (keep=SERIAL HHWT STRATA REPWT1-REPWT80 in=HOUSE) + IPUMS.sumpeople (keep=SERIAL HHINC); + by SERIAL; + if HOUSE; + if HHINC=0 then HHINC=.; + + /* + HHINC_CAT + Household income categories + 1: Under $50K + 2: $50K - $100K + 3: $100K - $200K + 4: Over $200K + */ + if HHINC=. then HHINC_CAT=.; + else if HHINC<50000 then HHINC_CAT=1; + else if 50000<=HHINC<100000 then HHINC_CAT=2; + else if 100000<=HHINC<200000 then HHINC_CAT=3; + else if 200000<=HHINC then HHINC_CAT=4; +run; + +proc datasets library=IPUMS nolist; + delete sumpeople; +run; + +proc format; + value SEX_f 1="Male" + 2="Female"; + + value AGEP_CAT_f 1="0-9" + 2="10-19" + 3="20-29" + 4="30-39" + 5="40-49" + 6="50-59" + 7="60-69" + 8="70-79" + 9="80+"; + + value AGEP_CAT2_f 1="Under 18" + 2="18 to 64" + 3="65 and over"; + + value AGEP_CAT3_f 1="Under 18" + 2="18 and over"; + + value RACE_ETH_f 1="White" + 2="Black" + 3="Native" + 4="Asian" + 5="Islander" + 6="Other" + 7="Two+" + 8="Hispanic"; + + value HHT_CAT_f 1="Married couples" + 2="Male householder" + 3="Female householder" + 4="Non-family"; + + value MARRIED_f 1="Married" + 2="Single"; + + value MAR_CAT_f 1="Never married" + 2="Now married" + 3="Divorced" + 4="Widowed"; + + value BIRTH_f 1="Gave birth in last year" + 2="Did not give birth in last year"; + + value AGEP_CAT_BIRTH_f 1="15-19" + 2="20-24" + 3="25-29" + 4="30-34" + 5="35-39" + 6="40-44" + 7="45-50"; + + value VACANT_f 1="Occuped" + 2="Vacant"; + + value OWN_f 1="Owner occupied" + 2="Renter occupied"; + + value VAL_OWN_CAT_f 1="Under $100K" + 2="$100K-$200K" + 3="$200K-$300K" + 4="$300K-$400K" + 5="$400K-$500K" + 6="$500K-$1M" + 7="Over $1M"; + + value HS_f 1="High school grad or higher" + 2="Not a high school grad"; + + value COLLEGE_f 1="Bachelor's degree or higher" + 2="No Bachelor's degree"; + + value SCHL_CAT_f 1="No degree" + 2="High school" + 3="Some college" + 4="Bachelor's" + 5="Post-grad"; + + value LAN_CAT_f 1="Language other than English spoken at home" + 2="English only"; + + value LAN_CAT2_f 1="English only" + 2="Spanish" + 3="Indo-European" + 4="Asian/Islander" + 5="Other"; + + value FB_f 1="Foreign-born" + 2="Not foreign-born"; + + value POBP_CAT_f 1="Europe" + 2="Asia" + 3="Africa" + 4="Oceania" + 5="Latin America" + 6="North America"; + + value VET_f 1="Veteran" + 2="Not a veteran"; + + value HHINC_CAT_f 1="Under $50K" + 2="$50K - $100K" + 3="$100K - $200K" + 4="Over $200K"; + + value POV_f 1="Below poverty line" + 2="Above poverty line"; + + value COMM_CAT_f 1="Drove alone" + 2="Carpooled" + 3="Public transit" + 4="Bicycle" + 5="Walked" + 6="Other" + 7="Worked at home"; + + value BLD_CAT_f 1="Single unit" + 2="Multi-unit" + 3="Mobile home" + 4="Boat, RV, van, etc."; + + value MIG_CAT_f 1="Moved in last year" + 2="Did not move in last year"; + + value MIG_CAT2_f 1="Same house year ago" + 2="From same county" + 3="From different county" + 4="From different state" + 5="From abroad"; +run; diff --git a/src/03_Demographics.sas b/src/03_Demographics.sas new file mode 100644 index 0000000..36ed4cd --- /dev/null +++ b/src/03_Demographics.sas @@ -0,0 +1,42 @@ +libname IPUMS "/home/u44593168"; + +/* Population */ +proc means data=IPUMS.people sum; + var PERWT; +run; + +/* Median Age */ +proc means data=IPUMS.people median maxdec=1; + var AGEP_NUM; + weight PERWT; +run; + +/* Population by age range */ +ods select freqplot; +proc freq data=IPUMS.people; + format AGEP_CAT AGEP_CAT_f.; + tables AGEP_CAT / plots=freqplot(orient=vertical scale=percent); + weight PERWT; +run; + +/* Population by age category */ +proc gchart data=IPUMS.people; + format AGEP_CAT2 AGEP_CAT2_f.; + donut AGEP_CAT2 / discrete clockwise slice=outside value=none percent=outside freq=PERWT; +run; +quit; + +/* Sex */ +proc gchart data=IPUMS.people; + format SEX SEX_f.; + donut SEX / discrete clockwise slice=outside value=none percent=outside freq=PERWT; +run; +quit; + +/* Race & Ethnicity */ +ods select freqplot; +proc freq data=IPUMS.people; + format RACE_ETH RACE_ETH_f.; + tables RACE_ETH / plots=freqplot(orient=vertical scale=percent); + weight PERWT; +run; diff --git a/src/04_Economics.sas b/src/04_Economics.sas new file mode 100644 index 0000000..0a179d0 --- /dev/null +++ b/src/04_Economics.sas @@ -0,0 +1,73 @@ +libname IPUMS "/home/u44593168"; + +/* Per capita income */ +proc means data=IPUMS.people sum noprint; + var INC; + weight PERWT; + output out=IPUMS.sumproduct sum=GDP; +run; +proc means data=IPUMS.people sum noprint; + var PERWT; + output out=IPUMS.sumpopulation sum=POP; +run; +data IPUMS.productpercapita; + merge + IPUMS.sumproduct (keep=GDP) + IPUMS.sumpopulation (keep=POP); + INCOME_PER_CAPITA=GDP/POP; +run; +proc print data=IPUMS.productpercapita; + var INCOME_PER_CAPITA; +run; +proc datasets library=IPUMS nolist; + delete sumproduct sumpopulation productpercapita; +run; + +/* Median household income */ +proc means data=IPUMS.mergehouses median; + var HHINC; + weight HHWT; +run; + +/* Household income */ +ods select freqplot; +proc freq data=IPUMS.mergehouses; + format HHINC_CAT HHINC_CAT_f.; + tables HHINC_CAT / plots=freqplot(orient=vertical scale=percent); + weight HHWT; +run; + +/* Persons below poverty line */ +proc freq data=IPUMS.people; + format POV POV_f.; + tables POV / plots=freqplot(orient=vertical scale=percent); + weight PERWT; +run; + +/* Persons below poverty line - Children (Under 18) - Seniors (65 and over) */ +proc sort data=IPUMS.people + out=IPUMS.sortpeople; + by AGEP_CAT2; +run; +proc gchart data=IPUMS.sortpeople; + format POV POV_f. AGEP_CAT2 AGEP_CAT2_f.; + by AGEP_CAT2; + donut POV / discrete clockwise slice=outside value=none percent=outside freq=PERWT; +run; +quit; +proc datasets library=IPUMS nolist; + delete sortpeople; +run; + +/* Mean travel time to work */ +proc means data=IPUMS.people mean; + var COMM; + weight PERWT; +run; + +/* Means of transportation to work */ +proc freq data=IPUMS.people; + format COMM_CAT COMM_CAT_f.; + tables COMM_CAT / plots=freqplot(orient=vertical scale=percent); + weight PERWT; +run; diff --git a/src/05_Families.sas b/src/05_Families.sas new file mode 100644 index 0000000..5fd76a4 --- /dev/null +++ b/src/05_Families.sas @@ -0,0 +1,57 @@ +libname IPUMS "/home/u44593168"; + +/* Number of households */ +proc means data=IPUMS.houses sum; + var HHWT; +run; + +/* Persons per household */ +proc means data=IPUMS.houses mean maxdec=1; + var NP_NUM; + weight HHWT; +run; + +/* Population by household type */ +proc gchart data=IPUMS.people; + format HHT_CAT HHT_CAT_f.; + donut HHT_CAT / discrete clockwise slice=outside value=none percent=outside freq=PERWT; +run; +quit; + +/* Marital Status */ +proc gchart data=IPUMS.people; + format MARRIED MARRIED_f.; + donut MARRIED / discrete clockwise slice=outside value=none percent=outside freq=PERWT; +run; +quit; + +/* Marital status, by sex */ +proc sort data=IPUMS.people + out=IPUMS.sortpeople; + by SEX; +run; +ods select freqplot; +proc freq data=IPUMS.sortpeople; + format SEX SEX_f. MAR_CAT MAR_CAT_f.; + by SEX; + tables MAR_CAT / plots=freqplot(orient=vertical scale=percent); + weight PERWT; +run; +proc datasets library=IPUMS nolist; + delete sortpeople; +run; + +/* Fertility */ +proc freq data=IPUMS.people; + format BIRTH BIRTH_f.; + tables BIRTH; + weight PERWT; +run; + +/* Women who gave birth during past year, by age group */ +ods select freqplot; +proc freq data=IPUMS.people; + format BIRTH BIRTH_f. AGEP_CAT_BIRTH AGEP_CAT_BIRTH_f.; + tables BIRTH*AGEP_CAT_BIRTH / plots=freqplot(orient=vertical scale=grouppercent); + weight PERWT; +run; diff --git a/src/06_Housing.sas b/src/06_Housing.sas new file mode 100644 index 0000000..f2615ec --- /dev/null +++ b/src/06_Housing.sas @@ -0,0 +1,56 @@ +libname IPUMS "/home/u44593168"; + +/* Number of housing units */ + +/* Occupied vs. Vacant */ +proc gchart data=IPUMS.houses; + format VACANT VACANT_f.; + donut VACANT / discrete clockwise slice=outside value=none percent=outside freq=HHWT; +run; +quit; + +/* Ownership of occupied units */ +proc gchart data=IPUMS.houses; + format OWN OWN_f.; + donut OWN / discrete clockwise slice=outside value=none percent=outside freq=HHWT; +run; +quit; + +/* Types of structure */ +proc gchart data=IPUMS.houses; + format BLD_CAT BLD_CAT_f.; + donut BLD_CAT / discrete clockwise slice=outside value=none percent=outside freq=HHWT; +run; +quit; + +/* Year moved in, by percentage of population */ +/* NOTE: Continuous data not available publicly */ + +/* Median value of owner-occupied housing units */ +proc means data=IPUMS.houses median; + var VAL_OWN; + weight HHWT; +run; + +/* Value of owner-occupied housing units */ +ods select freqplot; +proc freq data=IPUMS.houses; + format VAL_OWN_CAT VAL_OWN_CAT_f.; + tables VAL_OWN_CAT / plots=freqplot(orient=vertical scale=percent); + weight HHWT; +run; + +/* Moved since previous year */ +proc freq data=IPUMS.people; + format MIG_CAT MIG_CAT_f.; + tables MIG_CAT; + weight PERWT; +run; + +/* Population migration since previous year */ +ods select freqplot; +proc freq data=IPUMS.people; + format MIG_CAT2 MIG_CAT2_f.; + tables MIG_CAT2 / plots=freqplot(orient=vertical scale=percent); + weight PERWT; +run; diff --git a/src/07_Social.sas b/src/07_Social.sas new file mode 100644 index 0000000..ae7bc13 --- /dev/null +++ b/src/07_Social.sas @@ -0,0 +1,81 @@ +libname IPUMS "/home/u44593168"; + +/* High school grad or higher */ +proc freq data=IPUMS.people; + format HS HS_f.; + tables HS; + weight PERWT; +run; + +/* Bachelor's degree or higher */ +proc freq data=IPUMS.people; + format COLLEGE COLLEGE_f.; + tables COLLEGE; + weight PERWT; +run; + +/* Population by highest level of education */ +ods select freqplot; +proc freq data=IPUMS.people; + format SCHL_CAT SCHL_CAT_f.; + tables SCHL_CAT / plots=freqplot(orient=vertical scale=percent); + weight PERWT; +run; + +/* Persons with language other than English spoken at home */ +proc freq data=IPUMS.people; + format LAN_CAT LAN_CAT_f.; + tables LAN_CAT; + weight PERWT; +run; + +/* Language at home, children 5-17 - adults 18+ */ +proc sort data=IPUMS.people + out=IPUMS.sortpeople; + by AGEP_CAT3; +run; +ods select freqplot; +proc freq data=IPUMS.sortpeople; + format LAN_CAT2 LAN_CAT2_f. AGEP_CAT3 AGEP_CAT3_f.; + by AGEP_CAT3; + tables LAN_CAT2 / plots=freqplot(orient=vertical scale=percent); + weight PERWT; +run; +proc datasets library=IPUMS nolist; + delete sortpeople; +run; + +/* Foreign-born population */ +proc freq data=IPUMS.people; + format FOREIGN FOREIGN_f.; + tables FOREIGN; + weight PERWT; +run; + +/* Place of birth for foreign-born population */ +ods select freqplot; +proc freq data=IPUMS.people; + format POBP_CAT POBP_CAT_f.; + tables POBP_CAT / plots=freqplot(orient=vertical scale=percent); + weight PERWT; +run; + +/* Veteran status */ +proc freq data=IPUMS.people; + format VET VET_f.; + tables VET; + weight PERWT; +run; + +/* Veterans by wartime service */ +proc freq data=IPUMS.people; + tables MLPJ2 MLPH2 MLPE2 MLPB2 MLPA2; + weight PERWT; +run; + +/* Total veterans - Male - Female */ +proc freq data=IPUMS.people; + format VET VET_f. SEX SEX_f.; + tables VET*SEX / nopercent nocol norow nocum; + weight PERWT; +run; -- 2.43.4