extract all numbers from a string

Thanks *VERY* much, this is great!

I realized a few more cases, I think I've got something that 
covers all the possibilities now:


library(stringr)
tmpstr = "The first number is: 32.  Another one is: 32.1. 
Here's a number in scientific format, 0.3523e10, and 
another, 0.3523e-10, and a negative, -313.1"

patternslist = NULL
p=0
patternslist[[(p=p+1)]] = "(\\d+)"				# positive integer
patternslist[[(p=p+1)]] = "(-\\d+)"				# negative integer
patternslist[[(p=p+1)]] = "(\\d+\\.\\d+)"		# positive float
patternslist[[(p=p+1)]] = "(\\d+\\.\\d+e\\d+)"	# positive 
float, scientific w. positive power
patternslist[[(p=p+1)]] = "(\\d+\\.\\d+e-\\d+)" # positive 
float, scientific w. negative power
patternslist[[(p=p+1)]] = "(-\\d+\\.\\d+)"		# negative float
patternslist[[(p=p+1)]] = "(-\\d+\\.\\d+e\\d+)"	# negative 
float, scientific w. positive power
patternslist[[(p=p+1)]] = "(-\\d+\\.\\d+e-\\d+)"# negative 
float, scientific w. negative power

patternslist[[(p=p+1)]] = "(\\d+e\\d+)"			# positive int, 
scientific w. positive power
patternslist[[(p=p+1)]] = "(\\d+e-\\d+)" 		# positive int, 
scientific w. negative power
patternslist[[(p=p+1)]] = "(-\\d+e\\d+)"		# negative int, 
scientific w. positive power
patternslist[[(p=p+1)]] = "(-\\d+e-\\d+)"		# negative int, 
scientific w. negative power

pattern = paste(patternslist, collapse="|", sep="")
pattern
as.numeric(str_extract_all(tmpstr,pattern)[[1]])

# A more complex string
tmpstr = "The first number is: 32.  342 342.1   -3234e-10 
3234e-1 Another one is: 32.1. Here's a number in scientific 
format, 0.3523e10, and another, 0.3523e-10, and a negative, 
-313.1"
#pattern = 
"(\\d)+|(-\\d)+|(\\d+\\.\\d+)|(-\\d+\\.\\d+)|(\\d+.\\d+e\\d+)|(\\d+\\.\\d+e-\\d+)|(-\\d+.\\d+e\\d+)|(-\\d+\\.\\d+e-\\d+)"
as.numeric(str_extract_all(tmpstr,pattern)[[1]])



Cheers!
Nick


PS: A function version:


# Extract numbers / get numbers / get all numbers from a 
text string
getnums <- function(tmpstr)
	{
	# Example string
	# tmpstr = "The first number is: 32.  342 342.1   -3234e-10 
  3234e-1 Another one is: 32.1. Here's a number in 
scientific format, 0.3523e10, and another, 0.3523e-10, and a 
negative, -313.1"
	
	library(stringr)
	
# 	patternslist = NULL
# 	p=0
# 	patternslist[[(p=p+1)]] = "(\\d+)"				# positive integer
# 	patternslist[[(p=p+1)]] = "(-\\d+)"				# negative integer
# 	patternslist[[(p=p+1)]] = "(\\d+\\.\\d+)"		# positive float
# 	patternslist[[(p=p+1)]] = "(\\d+\\.\\d+e\\d+)"	# positive 
float, scientific w. positive power
# 	patternslist[[(p=p+1)]] = "(\\d+\\.\\d+e-\\d+)" # 
positive float, scientific w. negative power
# 	patternslist[[(p=p+1)]] = "(-\\d+\\.\\d+)"		# negative float
# 	patternslist[[(p=p+1)]] = "(-\\d+\\.\\d+e\\d+)"	# 
negative float, scientific w. positive power
# 	patternslist[[(p=p+1)]] = "(-\\d+\\.\\d+e-\\d+)"# 
negative float, scientific w. negative power
# 	
# 	patternslist[[(p=p+1)]] = "(\\d+e\\d+)"			# positive int, 
scientific w. positive power
# 	patternslist[[(p=p+1)]] = "(\\d+e-\\d+)" 		# positive 
int, scientific w. negative power
# 	patternslist[[(p=p+1)]] = "(-\\d+e\\d+)"		# negative int, 
scientific w. positive power
# 	patternslist[[(p=p+1)]] = "(-\\d+e-\\d+)"		# negative 
int, scientific w. negative power
# 	
# 	pattern = paste(patternslist, collapse="|", sep="")

	# set up the pattern
	pattern = 
"(\\d+)|(-\\d+)|(\\d+\\.\\d+)|(\\d+\\.\\d+e\\d+)|(\\d+\\.\\d+e-\\d+)|(-\\d+\\.\\d+)|(-\\d+\\.\\d+e\\d+)|(-\\d+\\.\\d+e-\\d+)|(\\d+e\\d+)|(\\d+e-\\d+)|(-\\d+e\\d+)|(-\\d+e-\\d+)"
	
	# Get the numbers
	nums_from_tmpstr = 
as.numeric(str_extract_all(tmpstr,pattern)[[1]])

	# Return them
	return(nums_from_tmpstr)
	}












On 6/15/13 10:46 PM, arun wrote:
>
>
> HI,
> One way would be:
>
> library(stringr)
> tmpstr = "The first number is: 32.  Another one is: 32.1.
> Here's a number in scientific format, 0.3523e10, and
> another, 0.3523e-10, and a negative, -313.1"
> pattern<- "(\\d)+|(\\d+\\.\\d+)|(-\\d+\\.\\d+)|(\\d+.\\d+e\\d+)|(\\d+\\.\\d+e-\\d+)"
> str_extract_all(tmpstr,pattern)[[1]]
> #[1] "32"         "32.1"       "0.3523e10"  "0.3523e-10" "-313.1"
>   as.numeric(str_extract_all(tmpstr,pattern)[[1]])
> A.K.
>
>
>
> ----- Original Message -----
> From: Nick Matzke <matzke at berkeley.edu>
> To: R-help at r-project.org
> Cc:
> Sent: Sunday, June 16, 2013 1:06 AM
> Subject: [R] extract all numbers from a string
>
> Hi all,
>
> I have been beating my head against this problem for a bit,
> but I can't figure it out.
>
> I have a series of strings of variable length, and each will
> have one or more numbers, of varying format.  E.g., I might
> have:
>
>
> tmpstr = "The first number is: 32.  Another one is: 32.1.
> Here's a number in scientific format, 0.3523e10, and
> another, 0.3523e-10, and a negative, -313.1"
>
> How could I get R to just give me a list of numerics
> containing the numbers therein?
>
> Thanks very much to the regexp wizards!
>
> Cheers,
> Nick
>
>
>