# dateregex - a regular expression expression to find dates in text, # taken from Regular Expressions for Language Engineering, # Karttunen, Chanod, Grefenstette, 1997, Rank Xerox Research Center, France # Perl can handle it. $n1to9 = '[1-9]'; $n0to9 = '[0-9]'; $SP = ',\s+'; $day = '(Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday)'; $month = '(January|February|March|April|May|June|July|August|September|October|November|December)'; $date = '([12][0-9]|[1-9]|3[01])'; $year = '([1-9][0-9][0-9][0-9])'; $date_expression = '($day|(($day$SP)?$month $date($SP$year)?))'; $long_date = '(($day$SP)?$month\s+$date($SP$year)?)'; @text = ; $text = join(" ",@text); print $text; #$no text =~ s/$long_date/\[$1\]/g; $text =~ s/((($day$SP)?$month\s+$date($SP$year)?)|$day)/\[$1\]/g; # yes #no $text =~ s/($date_expression)/\[$1\]/g; # yes $text =~ s/($year|$date)/\[$1\]/g; print "text: ", $text; # regular expression symbols: # 0 - epsilon, empty string, # ? - any # # - boundary marker, refers to either the beginning or end of a string # % - the escape character __DATA__ Sunday August 11 Sunday, August 11 August 11, 1996 Sunday, August 11, 1996 1 2 Tuesday 3 Monday